diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 7f09ca053a..a608d3954c 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang-15 \
+  clang-19 \
   lld
 
 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..5562638646
--- /dev/null
+++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,29 @@
+FROM ubuntu:24.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  g++ \
+  ca-certificates \
+  libc6-dev \
+  libc6-dev-arm64-cross \
+  qemu-user \
+  make \
+  file \
+  clang-19 \
+  curl \
+  xz-utils \
+  lld
+
+ENV TOOLCHAIN="arm-gnu-toolchain-14.2.rel1-x86_64-aarch64_be-none-linux-gnu"
+
+# Download the aarch64_be gcc toolchain
+RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.2.rel1/binrel/${TOOLCHAIN}.tar.xz" -o "${TOOLCHAIN}.tar.xz"
+RUN tar -xvf "${TOOLCHAIN}.tar.xz"
+RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
+
+ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
+ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
+
+ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
+ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -L ${AARCH64_BE_LIBC}"
+ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/bin/aarch64-none-linux-gnu-objdump"
diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index be9959240b..401164c19b 100644
--- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:22.04
+FROM ubuntu:24.04
 RUN apt-get update && apt-get install -y --no-install-recommends \
   gcc \
   g++ \
@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang-15 \
+  clang-19 \
   lld
 ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
     CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
diff --git a/ci/run.sh b/ci/run.sh
index 28d53c5375..73fd24a070 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -108,6 +108,7 @@ if [ "$NOSTD" != "1" ]; then
     cargo_test "${STDARCH_EXAMPLES} ${PROFILE}"
 fi
 
+
 # Test targets compiled with extra features.
 case ${TARGET} in
     x86*)
@@ -134,26 +135,60 @@ case ${TARGET} in
         export RUSTFLAGS="${OLD_RUSTFLAGS} -C target-feature=+vsx"
         cargo_test "${PROFILE}"
         ;;
+
+    # Setup aarch64 & armv7 specific variables, the runner, along with some 
+    # tests to skip
+    aarch64-unknown-linux-gnu*)
+        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
+        ;;
+
+    aarch64_be-unknown-linux-gnu*)
+        TEST_CPPFLAGS="-fuse-ld=lld"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
+        ;;
+
+    armv7-unknown-linux-gnueabihf*)
+        TEST_CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/"
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
+        TEST_CXX_COMPILER="clang++-19"
+        TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
+        ;;
     *)
         ;;
 
 esac
 
-if [ "${TARGET}" = "aarch64-unknown-linux-gnu" ]; then
-    (
-        CPPFLAGS="-fuse-ld=lld -I/usr/aarch64-linux-gnu/include/ -I/usr/aarch64-linux-gnu/include/c++/9/aarch64-linux-gnu/" \
-            RUSTFLAGS="$HOST_RUSTFLAGS" \
-            RUST_LOG=warn \
-            cargo run ${INTRINSIC_TEST} "${PROFILE}" --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json --runner "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}" --cppcompiler "clang++-15" --skip crates/intrinsic-test/missing_aarch64.txt
-    )
-elif [ "${TARGET}" = "armv7-unknown-linux-gnueabihf" ]; then
-    (
-        CPPFLAGS="-fuse-ld=lld -I/usr/arm-linux-gnueabihf/include/ -I/usr/arm-linux-gnueabihf/include/c++/9/arm-linux-gnueabihf/" \
-            RUSTFLAGS="$HOST_RUSTFLAGS" \
-            RUST_LOG=warn \
-            cargo run ${INTRINSIC_TEST} "${PROFILE}" --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json --runner "${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}" --cppcompiler "clang++-15" --skip crates/intrinsic-test/missing_arm.txt --a32
-    )
-fi
+# Arm specific
+case "${TARGET}" in
+    aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
+        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
+            cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
+            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
+            --runner "${TEST_RUNNER}" \
+            --cppcompiler "${TEST_CXX_COMPILER}" \
+            --skip "${TEST_SKIP_INTRINSICS}" \
+            --target "${TARGET}"
+        ;;
+
+    aarch64_be-unknown-linux-gnu*)
+        CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
+            cargo run "${INTRINSIC_TEST}" "${PROFILE}"  \
+            --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
+            --runner "${TEST_RUNNER}" \
+            --cppcompiler "${TEST_CXX_COMPILER}" \
+            --skip "${TEST_SKIP_INTRINSICS}" \
+            --target "${TARGET}" \
+            --linker "${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER}" \
+            --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
+        ;;
+     *)
+        ;;
+esac
 
 if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
     # Test examples
diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs
index 594c6d18c6..0defde52fd 100644
--- a/crates/core_arch/src/aarch64/mod.rs
+++ b/crates/core_arch/src/aarch64/mod.rs
@@ -10,10 +10,7 @@ mod mte;
 #[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
 pub use self::mte::*;
 
-// NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
-#[cfg(target_endian = "little")]
 mod neon;
-#[cfg(target_endian = "little")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub use self::neon::*;
 
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 90a70ee4d7..03c56ff0cb 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12,6 +12,44 @@ use stdarch_test::assert_instr;
 
 use super::*;
 
+#[doc = "CRC32-C single round checksum for quad words (64 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(crc32cx))]
+#[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")]
+pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32cx"
+        )]
+        fn ___crc32cd(crc: i32, data: i64) -> i32;
+    }
+    ___crc32cd(crc.as_signed(), data.as_signed()).as_unsigned()
+}
+#[doc = "CRC32 single round checksum for quad words (64 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(crc32x))]
+#[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")]
+pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32x"
+        )]
+        fn ___crc32d(crc: i32, data: i64) -> i32;
+    }
+    ___crc32d(crc.as_signed(), data.as_signed()).as_unsigned()
+}
 #[doc = "Signed Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_s8)"]
 #[doc = "## Safety"]
@@ -260,6 +298,60 @@ pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
 pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
     simd_fabs(a)
 }
+#[doc = "Absolute Value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(abs))]
+pub unsafe fn vabs_s64(a: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v1i64"
+        )]
+        fn _vabs_s64(a: int64x1_t) -> int64x1_t;
+    }
+    _vabs_s64(a)
+}
+#[doc = "Absolute Value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(abs))]
+pub unsafe fn vabsd_s64(a: i64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.i64"
+        )]
+        fn _vabsd_s64(a: i64) -> i64;
+    }
+    _vabsd_s64(a)
+}
+#[doc = "Absolute Value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(abs))]
+pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v2i64"
+        )]
+        fn _vabsq_s64(a: int64x2_t) -> int64x2_t;
+    }
+    _vabsq_s64(a)
+}
 #[doc = "Add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddd_s64)"]
 #[doc = "## Safety"]
@@ -354,6 +446,42 @@ pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
     }
     _vaddlv_s32(a)
 }
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlv_s8(a: int8x8_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i32.v8i8"
+        )]
+        fn _vaddlv_s8(a: int8x8_t) -> i32;
+    }
+    _vaddlv_s8(a) as i16
+}
+#[doc = "Signed Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(saddlv))]
+pub unsafe fn vaddlvq_s8(a: int8x16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlv.i32.v16i8"
+        )]
+        fn _vaddlvq_s8(a: int8x16_t) -> i32;
+    }
+    _vaddlvq_s8(a) as i16
+}
 #[doc = "Unsigned Add Long across Vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u16)"]
 #[doc = "## Safety"]
@@ -426,6 +554,42 @@ pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
     }
     _vaddlv_u32(a.as_signed()).as_unsigned()
 }
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlv_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+pub unsafe fn vaddlv_u8(a: uint8x8_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i32.v8i8"
+        )]
+        fn _vaddlv_u8(a: int8x8_t) -> i32;
+    }
+    _vaddlv_u8(a.as_signed()).as_unsigned() as u16
+}
+#[doc = "Unsigned Add Long across Vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddlvq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uaddlv))]
+pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlv.i32.v16i8"
+        )]
+        fn _vaddlvq_u8(a: int8x16_t) -> i32;
+    }
+    _vaddlvq_u8(a.as_signed()).as_unsigned() as u16
+}
 #[doc = "Floating-point add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_f32)"]
 #[doc = "## Safety"]
@@ -480,255 +644,507 @@ pub unsafe fn vaddvq_f64(a: float64x2_t) -> f64 {
     }
     _vaddvq_f64(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s8)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vaddv_s32(a: int32x2_t) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxs.v16i8"
+            link_name = "llvm.aarch64.neon.saddv.i32.v2i32"
         )]
-        fn _vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+        fn _vaddv_s32(a: int32x2_t) -> i32;
     }
-    _vbcaxq_s8(a, b, c)
+    _vaddv_s32(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s16)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddv_s8(a: int8x8_t) -> i8 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxs.v8i16"
+            link_name = "llvm.aarch64.neon.saddv.i32.v8i8"
         )]
-        fn _vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+        fn _vaddv_s8(a: int8x8_t) -> i8;
     }
-    _vbcaxq_s16(a, b, c)
+    _vaddv_s8(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_s8(a: int8x16_t) -> i8 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxs.v4i32"
+            link_name = "llvm.aarch64.neon.saddv.i32.v16i8"
         )]
-        fn _vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vaddvq_s8(a: int8x16_t) -> i8;
     }
-    _vbcaxq_s32(a, b, c)
+    _vaddvq_s8(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s64)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddv_s16(a: int16x4_t) -> i16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxs.v2i64"
+            link_name = "llvm.aarch64.neon.saddv.i32.v4i16"
         )]
-        fn _vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        fn _vaddv_s16(a: int16x4_t) -> i16;
     }
-    _vbcaxq_s64(a, b, c)
+    _vaddv_s16(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u8)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_s16(a: int16x8_t) -> i16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxu.v16i8"
+            link_name = "llvm.aarch64.neon.saddv.i32.v8i16"
         )]
-        fn _vbcaxq_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+        fn _vaddvq_s16(a: int16x8_t) -> i16;
     }
-    _vbcaxq_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vaddvq_s16(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u16)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_s32(a: int32x4_t) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxu.v8i16"
+            link_name = "llvm.aarch64.neon.saddv.i32.v4i32"
         )]
-        fn _vbcaxq_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+        fn _vaddvq_s32(a: int32x4_t) -> i32;
     }
-    _vbcaxq_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vaddvq_s32(a)
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vaddv_u32(a: uint32x2_t) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxu.v4i32"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"
         )]
-        fn _vbcaxq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vaddv_u32(a: int32x2_t) -> i32;
     }
-    _vbcaxq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vaddv_u32(a.as_signed()).as_unsigned()
 }
-#[doc = "Bit clear and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u64)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(bcax))]
-pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddv_u8(a: uint8x8_t) -> u8 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.bcaxu.v2i64"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v8i8"
         )]
-        fn _vbcaxq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        fn _vaddv_u8(a: int8x8_t) -> i8;
     }
-    _vbcaxq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vaddv_u8(a.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_u8(a: uint8x16_t) -> u8 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v16i8"
         )]
-        fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vaddvq_u8(a: int8x16_t) -> i8;
     }
-    _vcadd_rot270_f32(a, b)
+    _vaddvq_u8(a.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddv_u16(a: uint16x4_t) -> u16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v4i16"
         )]
-        fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vaddv_u16(a: int16x4_t) -> i16;
     }
-    _vcaddq_rot270_f32(a, b)
+    _vaddv_u16(a.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_u16(a: uint16x8_t) -> u16 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v8i16"
         )]
-        fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vaddvq_u16(a: int16x8_t) -> i16;
     }
-    _vcaddq_rot270_f64(a, b)
+    _vaddvq_u16(a.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addv))]
+pub unsafe fn vaddvq_u32(a: uint32x4_t) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32"
+            link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"
         )]
-        fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vaddvq_u32(a: int32x4_t) -> i32;
     }
-    _vcadd_rot90_f32(a, b)
+    _vaddvq_u32(a.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vaddvq_s64(a: int64x2_t) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32"
+            link_name = "llvm.aarch64.neon.saddv.i64.v2i64"
         )]
-        fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vaddvq_s64(a: int64x2_t) -> i64;
     }
-    _vcaddq_rot90_f32(a, b)
+    _vaddvq_s64(a)
 }
-#[doc = "Floating-point complex add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"]
+#[doc = "Add across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,fcma")]
-#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
-#[cfg_attr(test, assert_instr(fcadd))]
-pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64"
+            link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"
         )]
-        fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vaddvq_u64(a: int64x2_t) -> i64;
+    }
+    _vaddvq_u64(a.as_signed()).as_unsigned()
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v16i8"
+        )]
+        fn _vbcaxq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _vbcaxq_s8(a, b, c)
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v8i16"
+        )]
+        fn _vbcaxq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vbcaxq_s16(a, b, c)
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v4i32"
+        )]
+        fn _vbcaxq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vbcaxq_s32(a, b, c)
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxs.v2i64"
+        )]
+        fn _vbcaxq_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vbcaxq_s64(a, b, c)
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v16i8"
+        )]
+        fn _vbcaxq_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _vbcaxq_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v8i16"
+        )]
+        fn _vbcaxq_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vbcaxq_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v4i32"
+        )]
+        fn _vbcaxq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vbcaxq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Bit clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vbcaxq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub unsafe fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.bcaxu.v2i64"
+        )]
+        fn _vbcaxq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vbcaxq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f32"
+        )]
+        fn _vcadd_rot270_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vcadd_rot270_f32(a, b)
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v4f32"
+        )]
+        fn _vcaddq_rot270_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vcaddq_rot270_f32(a, b)
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot270_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot270.v2f64"
+        )]
+        fn _vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vcaddq_rot270_f64(a, b)
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcadd_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f32"
+        )]
+        fn _vcadd_rot90_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vcadd_rot90_f32(a, b)
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v4f32"
+        )]
+        fn _vcaddq_rot90_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vcaddq_rot90_f32(a, b)
+}
+#[doc = "Floating-point complex add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcaddq_rot90_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,fcma")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
+#[cfg_attr(test, assert_instr(fcadd))]
+pub unsafe fn vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcadd.rot90.v2f64"
+        )]
+        fn _vcaddq_rot90_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
     _vcaddq_rot90_f64(a, b)
 }
@@ -3201,7 +3617,7 @@ pub unsafe fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3209,371 +3625,70 @@ pub unsafe fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(
-    a: int8x16_t,
-    b: int8x8_t,
-) -> int8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
+pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x8_t,
+    b: uint8x8_t,
+) -> uint8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(
-            a,
-            b,
-            [
-                16 + LANE2 as u32,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        1 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                16 + LANE2 as u32,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        2 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                16 + LANE2 as u32,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        3 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                16 + LANE2 as u32,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        4 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                16 + LANE2 as u32,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        5 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                16 + LANE2 as u32,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        6 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                16 + LANE2 as u32,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        7 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                16 + LANE2 as u32,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        8 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                16 + LANE2 as u32,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        9 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                16 + LANE2 as u32,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        10 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                16 + LANE2 as u32,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        11 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                16 + LANE2 as u32,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        12 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                16 + LANE2 as u32,
-                13,
-                14,
-                15
-            ]
-        ),
-        13 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                16 + LANE2 as u32,
-                14,
-                15
-            ]
-        ),
-        14 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                16 + LANE2 as u32,
-                15
-            ]
-        ),
-        15 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                16 + LANE2 as u32
-            ]
-        ),
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3581,10 +3696,10 @@ pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-) -> int16x4_t {
+pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+) -> uint16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
     match LANE1 & 0b11 {
@@ -3596,7 +3711,7 @@ pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3604,13 +3719,33 @@ pub unsafe fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
-    a: int16x8_t,
-    b: int16x4_t,
-) -> int16x8_t {
+pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x8_t,
+    b: poly8x8_t,
+) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 2);
-    let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    static_assert_uimm_bits!(LANE2, 3);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -3624,7 +3759,7 @@ pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3632,20 +3767,22 @@ pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x4_t,
+    b: poly16x4_t,
+) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3653,23 +3790,21 @@ pub unsafe fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
-    a: int32x4_t,
-    b: int32x2_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 1);
-    let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x2_t,
+    b: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 2);
+    let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3677,26 +3812,27 @@ pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(
-    a: uint8x8_t,
-    b: uint8x8_t,
-) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
+pub unsafe fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x8_t,
+    b: int8x16_t,
+) -> int8x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -3704,371 +3840,23 @@ pub unsafe fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
-    a: uint8x16_t,
-    b: uint8x8_t,
-) -> uint8x16_t {
-    static_assert_uimm_bits!(LANE1, 4);
+pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x4_t,
+    b: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b1111 {
-        0 => simd_shuffle!(
-            a,
-            b,
-            [
-                16 + LANE2 as u32,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        1 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                16 + LANE2 as u32,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        2 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                16 + LANE2 as u32,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        3 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                16 + LANE2 as u32,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        4 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                16 + LANE2 as u32,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        5 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                16 + LANE2 as u32,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        6 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                16 + LANE2 as u32,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        7 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                16 + LANE2 as u32,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        8 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                16 + LANE2 as u32,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        9 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                16 + LANE2 as u32,
-                10,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        10 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                16 + LANE2 as u32,
-                11,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        11 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                16 + LANE2 as u32,
-                12,
-                13,
-                14,
-                15
-            ]
-        ),
-        12 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                16 + LANE2 as u32,
-                13,
-                14,
-                15
-            ]
-        ),
-        13 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                16 + LANE2 as u32,
-                14,
-                15
-            ]
-        ),
-        14 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                16 + LANE2 as u32,
-                15
-            ]
-        ),
-        15 => simd_shuffle!(
-            a,
-            b,
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                16 + LANE2 as u32
-            ]
-        ),
+    let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4076,22 +3864,21 @@ pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
-    a: uint16x4_t,
-    b: uint16x4_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
+pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x2_t,
+    b: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+    let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4099,27 +3886,27 @@ pub unsafe fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
-    a: uint16x8_t,
-    b: uint16x4_t,
-) -> uint16x8_t {
+pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x8_t,
+    b: uint8x16_t,
+) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 2);
-    let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4127,20 +3914,23 @@ pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
-    a: uint32x2_t,
-    b: uint32x2_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x4_t,
+    b: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 3);
+    let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4148,23 +3938,21 @@ pub unsafe fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
-    a: uint32x4_t,
-    b: uint32x2_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 1);
-    let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x2_t,
+    b: uint32x4_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 2);
+    let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
         _ => unreachable_unchecked(),
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4172,26 +3960,27 @@ pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(
+pub unsafe fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
     a: poly8x8_t,
-    b: poly8x8_t,
+    b: poly8x16_t,
 ) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
+    static_assert_uimm_bits!(LANE2, 4);
+    let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4199,13 +3988,149 @@ pub unsafe fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
-    a: poly8x16_t,
-    b: poly8x8_t,
-) -> poly8x16_t {
+pub unsafe fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x4_t,
+    b: poly16x8_t,
+) -> poly16x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 3);
+    let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x4_t,
+    b: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
+    a: float64x2_t,
+    b: float64x1_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
+    a: int64x2_t,
+    b: int64x1_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
+    a: uint64x2_t,
+    b: uint64x1_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x1_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert!(LANE2 == 0);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x16_t,
+    b: int8x8_t,
+) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b1111 {
         0 => simd_shuffle!(
             a,
@@ -4563,30 +4488,7 @@ pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
-    a: poly16x4_t,
-    b: poly16x4_t,
-) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4594,13 +4496,13 @@ pub unsafe fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
-    a: poly16x8_t,
-    b: poly16x4_t,
-) -> poly16x8_t {
+pub unsafe fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x8_t,
+    b: int16x4_t,
+) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+    let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -4614,29 +4516,7 @@ pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
-    a: float32x2_t,
-    b: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 2);
-    let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4644,12 +4524,13 @@ pub unsafe fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-) -> float32x4_t {
+pub unsafe fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x4_t,
+    b: int32x2_t,
+) -> int32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
     match LANE1 & 0b11 {
         0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
         1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
@@ -4659,28 +4540,7 @@ pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
-    a: float64x2_t,
-    b: float64x2_t,
-) -> float64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -4688,40 +4548,13 @@ pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(
-    a: int8x8_t,
-    b: int8x16_t,
-) -> int8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
-    a: int8x16_t,
-    b: int8x16_t,
-) -> int8x16_t {
+pub unsafe fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x16_t,
+    b: uint8x8_t,
+) -> uint8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 4);
+    static_assert_uimm_bits!(LANE2, 3);
+    let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b1111 {
         0 => simd_shuffle!(
             a,
@@ -5079,31 +4912,7 @@ pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
-    a: int16x4_t,
-    b: int16x8_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 3);
-    let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5111,12 +4920,13 @@ pub unsafe fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-) -> int16x8_t {
+pub unsafe fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x8_t,
+    b: uint16x4_t,
+) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
+    static_assert_uimm_bits!(LANE2, 2);
+    let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -5130,29 +4940,7 @@ pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
-    a: int32x2_t,
-    b: int32x4_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 2);
-    let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5160,12 +4948,13 @@ pub unsafe fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-) -> int32x4_t {
+pub unsafe fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x4_t,
+    b: uint32x2_t,
+) -> uint32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 2);
+    static_assert_uimm_bits!(LANE2, 1);
+    let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
     match LANE1 & 0b11 {
         0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
         1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
@@ -5175,56 +4964,7 @@ pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
-    a: int64x2_t,
-    b: int64x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
-    a: uint8x8_t,
-    b: uint8x16_t,
-) -> uint8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5232,12 +4972,13 @@ pub unsafe fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
-    a: uint8x16_t,
-    b: uint8x16_t,
-) -> uint8x16_t {
+pub unsafe fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x16_t,
+    b: poly8x8_t,
+) -> poly8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
-    static_assert_uimm_bits!(LANE2, 4);
+    static_assert_uimm_bits!(LANE2, 3);
+    let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
     match LANE1 & 0b1111 {
         0 => simd_shuffle!(
             a,
@@ -5595,31 +5336,7 @@ pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
-    a: uint16x4_t,
-    b: uint16x8_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 3);
-    let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5627,12 +5344,13 @@ pub unsafe fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
-    a: uint16x8_t,
-    b: uint16x8_t,
-) -> uint16x8_t {
+pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x8_t,
+    b: poly16x4_t,
+) -> poly16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
+    static_assert_uimm_bits!(LANE2, 2);
+    let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);
     match LANE1 & 0b111 {
         0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
         1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
@@ -5646,29 +5364,7 @@ pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
-    a: uint32x2_t,
-    b: uint32x4_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 2);
-    let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5676,10 +5372,10 @@ pub unsafe fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-) -> uint32x4_t {
+pub unsafe fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+) -> float32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
     match LANE1 & 0b11 {
@@ -5691,7 +5387,7 @@ pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5699,10 +5395,10 @@ pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
-    a: uint64x2_t,
-    b: uint64x2_t,
-) -> uint64x2_t {
+pub unsafe fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+) -> float64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
     match LANE1 & 0b1 {
@@ -5712,7 +5408,7 @@ pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
     }
 }
 #[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -5720,38 +5416,10 @@ pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
 #[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
-    a: poly8x8_t,
-    b: poly8x16_t,
-) -> poly8x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 4);
-    let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
-}
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
-    a: poly8x16_t,
-    b: poly8x16_t,
-) -> poly8x16_t {
+pub unsafe fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
+    a: int8x16_t,
+    b: int8x16_t,
+) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
     match LANE1 & 0b1111 {
@@ -6023,13324 +5691,20134 @@ pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
             a,
             b,
             [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                16 + LANE2 as u32,
-                13,
-                14,
-                15
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
+    a: int64x2_t,
+    b: int64x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
+    a: uint8x16_t,
+    b: uint8x16_t,
+) -> uint8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 4);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE1, 2);
+    static_assert_uimm_bits!(LANE2, 2);
+    match LANE1 & 0b11 {
+        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
+        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
+    a: uint64x2_t,
+    b: uint64x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
+    a: poly8x16_t,
+    b: poly8x16_t,
+) -> poly8x16_t {
+    static_assert_uimm_bits!(LANE1, 4);
+    static_assert_uimm_bits!(LANE2, 4);
+    match LANE1 & 0b1111 {
+        0 => simd_shuffle!(
+            a,
+            b,
+            [
+                16 + LANE2 as u32,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                16 + LANE2 as u32,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                16 + LANE2 as u32,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                16 + LANE2 as u32,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                16 + LANE2 as u32,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                16 + LANE2 as u32,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                16 + LANE2 as u32,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                16 + LANE2 as u32,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                16 + LANE2 as u32,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                16 + LANE2 as u32,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                16 + LANE2 as u32,
+                11,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                16 + LANE2 as u32,
+                12,
+                13,
+                14,
+                15
+            ]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                16 + LANE2 as u32,
+                13,
+                14,
+                15
+            ]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                16 + LANE2 as u32,
+                14,
+                15
+            ]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                16 + LANE2 as u32,
+                15
+            ]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                16 + LANE2 as u32
+            ]
+        ),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
+    a: poly16x8_t,
+    b: poly16x8_t,
+) -> poly16x8_t {
+    static_assert_uimm_bits!(LANE1, 3);
+    static_assert_uimm_bits!(LANE2, 3);
+    match LANE1 & 0b111 {
+        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
+        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
+        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
+        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
+        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
+        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
+        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    a: poly64x2_t,
+    b: poly64x2_t,
+) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE1, 1);
+    static_assert_uimm_bits!(LANE2, 1);
+    match LANE1 & 0b1 {
+        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
+        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcreate_f64(a: u64) -> float64x1_t {
+    transmute(a)
+}
+#[doc = "Floating-point convert to lower precision narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
+    simd_cast(a)
+}
+#[doc = "Floating-point convert to higher precision long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t {
+    simd_cast(a)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t {
+    simd_cast(a)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    simd_cast(a)
+}
+#[doc = "Floating-point convert to lower precision narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3])
+}
+#[doc = "Floating-point convert to higher precision long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
+    let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
+    simd_cast(b)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64"
+        )]
+        fn _vcvt_n_f64_s64(a: int64x1_t, n: i32) -> float64x1_t;
+    }
+    _vcvt_n_f64_s64(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64"
+        )]
+        fn _vcvtq_n_f64_s64(a: int64x2_t, n: i32) -> float64x2_t;
+    }
+    _vcvtq_n_f64_s64(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64"
+        )]
+        fn _vcvt_n_f64_u64(a: int64x1_t, n: i32) -> float64x1_t;
+    }
+    _vcvt_n_f64_u64(a.as_signed(), N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64"
+        )]
+        fn _vcvtq_n_f64_u64(a: int64x2_t, n: i32) -> float64x2_t;
+    }
+    _vcvtq_n_f64_u64(a.as_signed(), N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64"
+        )]
+        fn _vcvt_n_s64_f64(a: float64x1_t, n: i32) -> int64x1_t;
+    }
+    _vcvt_n_s64_f64(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64"
+        )]
+        fn _vcvtq_n_s64_f64(a: float64x2_t, n: i32) -> int64x2_t;
+    }
+    _vcvtq_n_s64_f64(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64"
+        )]
+        fn _vcvt_n_u64_f64(a: float64x1_t, n: i32) -> int64x1_t;
+    }
+    _vcvt_n_u64_f64(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64"
+        )]
+        fn _vcvtq_n_u64_f64(a: float64x2_t, n: i32) -> int64x2_t;
+    }
+    _vcvtq_n_u64_f64(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v1i64.v1f64"
+        )]
+        fn _vcvt_s64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvt_s64_f64(a)
+}
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v2i64.v2f64"
+        )]
+        fn _vcvtq_s64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtq_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v1i64.v1f64"
+        )]
+        fn _vcvt_u64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvt_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v2i64.v2f64"
+        )]
+        fn _vcvtq_u64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtq_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32"
+        )]
+        fn _vcvta_s32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvta_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32"
+        )]
+        fn _vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtaq_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64"
+        )]
+        fn _vcvta_s64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvta_s64_f64(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64"
+        )]
+        fn _vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtaq_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32"
+        )]
+        fn _vcvta_u32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvta_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32"
+        )]
+        fn _vcvtaq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtaq_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64"
+        )]
+        fn _vcvta_u64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvta_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64"
+        )]
+        fn _vcvtaq_u64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtaq_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i32.f32"
+        )]
+        fn _vcvtas_s32_f32(a: f32) -> i32;
+    }
+    _vcvtas_s32_f32(a)
+}
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtas))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i64.f64"
+        )]
+        fn _vcvtad_s64_f64(a: f64) -> i64;
+    }
+    _vcvtad_s64_f64(a)
+}
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i32.f32"
+        )]
+        fn _vcvtas_u32_f32(a: f32) -> i32;
+    }
+    _vcvtas_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtau))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i64.f64"
+        )]
+        fn _vcvtad_u64_f64(a: f64) -> i64;
+    }
+    _vcvtad_u64_f64(a).as_unsigned()
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 {
+    a as f64
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
+    a as f32
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32"
+        )]
+        fn _vcvtm_s32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtm_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32"
+        )]
+        fn _vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtmq_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64"
+        )]
+        fn _vcvtm_s64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtm_s64_f64(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64"
+        )]
+        fn _vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtmq_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32"
+        )]
+        fn _vcvtm_u32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtm_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32"
+        )]
+        fn _vcvtmq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtmq_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64"
+        )]
+        fn _vcvtm_u64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtm_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64"
+        )]
+        fn _vcvtmq_u64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtmq_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i32.f32"
+        )]
+        fn _vcvtms_s32_f32(a: f32) -> i32;
+    }
+    _vcvtms_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtms))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i64.f64"
+        )]
+        fn _vcvtmd_s64_f64(a: f64) -> i64;
+    }
+    _vcvtmd_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i32.f32"
+        )]
+        fn _vcvtms_u32_f32(a: f32) -> i32;
+    }
+    _vcvtms_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtmu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i64.f64"
+        )]
+        fn _vcvtmd_u64_f64(a: f64) -> i64;
+    }
+    _vcvtmd_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32"
+        )]
+        fn _vcvtn_s32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtn_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32"
+        )]
+        fn _vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtnq_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64"
+        )]
+        fn _vcvtn_s64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtn_s64_f64(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64"
+        )]
+        fn _vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtnq_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32"
+        )]
+        fn _vcvtn_u32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtn_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32"
+        )]
+        fn _vcvtnq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtnq_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64"
+        )]
+        fn _vcvtn_u64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtn_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64"
+        )]
+        fn _vcvtnq_u64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtnq_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i32.f32"
+        )]
+        fn _vcvtns_s32_f32(a: f32) -> i32;
+    }
+    _vcvtns_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtns))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i64.f64"
+        )]
+        fn _vcvtnd_s64_f64(a: f64) -> i64;
+    }
+    _vcvtnd_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i32.f32"
+        )]
+        fn _vcvtns_u32_f32(a: f32) -> i32;
+    }
+    _vcvtns_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtnu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i64.f64"
+        )]
+        fn _vcvtnd_u64_f64(a: f64) -> i64;
+    }
+    _vcvtnd_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32"
+        )]
+        fn _vcvtp_s32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtp_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32"
+        )]
+        fn _vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtpq_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64"
+        )]
+        fn _vcvtp_s64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtp_s64_f64(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64"
+        )]
+        fn _vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtpq_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32"
+        )]
+        fn _vcvtp_u32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvtp_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32"
+        )]
+        fn _vcvtpq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtpq_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64"
+        )]
+        fn _vcvtp_u64_f64(a: float64x1_t) -> int64x1_t;
+    }
+    _vcvtp_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64"
+        )]
+        fn _vcvtpq_u64_f64(a: float64x2_t) -> int64x2_t;
+    }
+    _vcvtpq_u64_f64(a).as_unsigned()
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i32.f32"
+        )]
+        fn _vcvtps_s32_f32(a: f32) -> i32;
+    }
+    _vcvtps_s32_f32(a)
+}
+#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtps))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i64.f64"
+        )]
+        fn _vcvtpd_s64_f64(a: f64) -> i64;
+    }
+    _vcvtpd_s64_f64(a)
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i32.f32"
+        )]
+        fn _vcvtps_u32_f32(a: f32) -> i32;
+    }
+    _vcvtps_u32_f32(a).as_unsigned()
+}
+#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtpu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i64.f64"
+        )]
+        fn _vcvtpd_u64_f64(a: f64) -> i64;
+    }
+    _vcvtpd_u64_f64(a).as_unsigned()
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_f32_u32(a: u32) -> f32 {
+    a as f32
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
+    a as f64
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32"
+        )]
+        fn _vcvts_n_f32_s32(a: i32, n: i32) -> f32;
+    }
+    _vcvts_n_f32_s32(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64"
+        )]
+        fn _vcvtd_n_f64_s64(a: i64, n: i32) -> f64;
+    }
+    _vcvtd_n_f64_s64(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32"
+        )]
+        fn _vcvts_n_f32_u32(a: i32, n: i32) -> f32;
+    }
+    _vcvts_n_f32_u32(a.as_signed(), N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64"
+        )]
+        fn _vcvtd_n_f64_u64(a: i64, n: i32) -> f64;
+    }
+    _vcvtd_n_f64_u64(a.as_signed(), N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32"
+        )]
+        fn _vcvts_n_s32_f32(a: f32, n: i32) -> i32;
+    }
+    _vcvts_n_s32_f32(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64"
+        )]
+        fn _vcvtd_n_s64_f64(a: f64, n: i32) -> i64;
+    }
+    _vcvtd_n_s64_f64(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32"
+        )]
+        fn _vcvts_n_u32_f32(a: f32, n: i32) -> i32;
+    }
+    _vcvts_n_u32_f32(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64"
+        )]
+        fn _vcvtd_n_u64_f64(a: f64, n: i32) -> i64;
+    }
+    _vcvtd_n_u64_f64(a, N).as_unsigned()
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_s32_f32(a: f32) -> i32 {
+    a as i32
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_s64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_s64_f64(a: f64) -> i64 {
+    a as i64
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvts_u32_f32(a: f32) -> u32 {
+    a as u32
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_u64_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
+    a as u64
+}
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64"
+        )]
+        fn _vcvtx_f32_f64(a: float64x2_t) -> float32x2_t;
+    }
+    _vcvtx_f32_f64(a)
+}
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
+    simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
+}
+#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fcvtxn))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
+    simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_div(a, b)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_div(a, b)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_div(a, b)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_div(a, b)
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdot_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int8x8_t,
+    c: int8x16_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdotq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: int8x16_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdot_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint8x8_t,
+    c: uint8x16_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(test, assert_instr(udot, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub unsafe fn vdotq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint8x16_t,
+    c: uint8x16_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t {
+    static_assert!(N == 0);
+    a
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
+    static_assert!(N == 0);
+    a
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<f64, _>(simd_extract!(a, N as u32))
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<u64, _>(simd_extract!(a, N as u32))
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 4))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 8))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
+    static_assert!(N == 0);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 0))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(dup, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
+    static_assert_uimm_bits!(N, 1);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
+    static_assert_uimm_bits!(N, 2);
+    simd_extract!(a, N as u32)
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v16i8"
+        )]
+        fn _veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _veor3q_s8(a, b, c)
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v8i16"
+        )]
+        fn _veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _veor3q_s16(a, b, c)
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v4i32"
+        )]
+        fn _veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _veor3q_s32(a, b, c)
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3s.v2i64"
+        )]
+        fn _veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _veor3q_s64(a, b, c)
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v16i8"
+        )]
+        fn _veor3q_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _veor3q_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v8i16"
+        )]
+        fn _veor3q_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _veor3q_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v4i32"
+        )]
+        fn _veor3q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _veor3q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Three-way exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.eor3u.v2i64"
+        )]
+        fn _veor3q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _veor3q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ext, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmadd))]
+pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.v1f64"
+        )]
+        fn _vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
+    }
+    _vfma_f64(b, c, a)
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x2_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_lane_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE == 0);
+    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfma_laneq_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x2_t,
+) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmadd))]
+pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfma_f64(a, b, vdup_n_f64(c))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f64"
+        )]
+        fn _vfmad_lane_f64(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert!(LANE == 0);
+    let c: f64 = simd_extract!(c, LANE as u32);
+    _vfmad_lane_f64(b, c, a)
+}
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.v2f64"
+        )]
+        fn _vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
+    }
+    _vfmaq_f64(b, c, a)
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmaq_lane_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x1_t,
+) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmaq_f64(a, b, vdupq_n_f64(c))
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f32"
+        )]
+        fn _vfmas_lane_f32(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_uimm_bits!(LANE, 1);
+    let c: f32 = simd_extract!(c, LANE as u32);
+    _vfmas_lane_f32(b, c, a)
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f32"
+        )]
+        fn _vfmas_laneq_f32(a: f32, b: f32, c: f32) -> f32;
+    }
+    static_assert_uimm_bits!(LANE, 2);
+    let c: f32 = simd_extract!(c, LANE as u32);
+    _vfmas_laneq_f32(b, c, a)
+}
+#[doc = "Floating-point fused multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fma.f64"
+        )]
+        fn _vfmad_laneq_f64(a: f64, b: f64, c: f64) -> f64;
+    }
+    static_assert_uimm_bits!(LANE, 1);
+    let c: f64 = simd_extract!(c, LANE as u32);
+    _vfmad_laneq_f64(b, c, a)
+}
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    let b: float64x1_t = simd_neg(b);
+    vfma_f64(a, b, c)
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_laneq_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x2_t,
+) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_lane_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE == 0);
+    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_laneq_f64<const LANE: i32>(
+    a: float64x1_t,
+    b: float64x1_t,
+    c: float64x2_t,
+) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
+    vfms_f64(a, b, vdup_n_f64(c))
+}
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    let b: float64x2_t = simd_neg(b);
+    vfmaq_f64(a, b, c)
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_lane_f64<const LANE: i32>(
+    a: float64x2_t,
+    b: float64x2_t,
+    c: float64x1_t,
+) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+}
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmls))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
+    vfmsq_f64(a, b, vdupq_n_f64(c))
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
+    vfmas_lane_f32::<LANE>(a, -b, c)
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
+    vfmas_laneq_f32::<LANE>(a, -b, c)
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
+    vfmad_lane_f64::<LANE>(a, -b, c)
+}
+#[doc = "Floating-point fused multiply-subtract to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
+    vfmad_laneq_f64::<LANE>(a, -b, c)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ldr))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x2(a: *const f64) -> float64x1x2_t;
+    }
+    _vld1_f64_x2(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x3(a: *const f64) -> float64x1x3_t;
+    }
+    _vld1_f64_x3(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64"
+        )]
+        fn _vld1_f64_x4(a: *const f64) -> float64x1x4_t;
+    }
+    _vld1_f64_x4(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x2(a: *const f64) -> float64x2x2_t;
+    }
+    _vld1q_f64_x2(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x3(a: *const f64) -> float64x2x3_t;
+    }
+    _vld1q_f64_x3(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld1))]
+pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64"
+        )]
+        fn _vld1q_f64_x4(a: *const f64) -> float64x2x4_t;
+    }
+    _vld1q_f64_x4(a)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64"
+        )]
+        fn _vld2_dup_f64(ptr: *const f64) -> float64x1x2_t;
+    }
+    _vld2_dup_f64(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64"
+        )]
+        fn _vld2q_dup_f64(ptr: *const f64) -> float64x2x2_t;
+    }
+    _vld2q_dup_f64(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64"
+        )]
+        fn _vld2q_dup_s64(ptr: *const i64) -> int64x2x2_t;
+    }
+    _vld2q_dup_s64(a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64"
+        )]
+        fn _vld2_f64(ptr: *const float64x1_t) -> float64x1x2_t;
+    }
+    _vld2_f64(a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8"
+        )]
+        fn _vld2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
+    }
+    _vld2_lane_f64(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8"
+        )]
+        fn _vld2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
+    }
+    _vld2_lane_s64(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) -> poly64x1x2_t {
+    static_assert!(LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t {
+    static_assert!(LANE == 0);
+    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
+    let mut ret_val: poly64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
+    let mut ret_val: uint64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64"
+        )]
+        fn _vld2q_f64(ptr: *const float64x2_t) -> float64x2x2_t;
+    }
+    _vld2q_f64(a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64"
+        )]
+        fn _vld2q_s64(ptr: *const int64x2_t) -> int64x2x2_t;
+    }
+    _vld2q_s64(a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8"
+        )]
+        fn _vld2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8)
+            -> float64x2x2_t;
+    }
+    _vld2q_lane_f64(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8"
+        )]
+        fn _vld2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
+    }
+    _vld2q_lane_s8(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8"
+        )]
+        fn _vld2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
+    }
+    _vld2q_lane_s64(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) -> poly64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
+    let mut ret_val: poly64x2x2_t = transmute(vld2q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
+    transmute(vld2q_s64(transmute(a)))
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
+    let mut ret_val: uint64x2x2_t = transmute(vld2q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64"
+        )]
+        fn _vld3_dup_f64(ptr: *const f64) -> float64x1x3_t;
+    }
+    _vld3_dup_f64(a as _)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64"
+        )]
+        fn _vld3q_dup_f64(ptr: *const f64) -> float64x2x3_t;
+    }
+    _vld3q_dup_f64(a as _)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64"
+        )]
+        fn _vld3q_dup_s64(ptr: *const i64) -> int64x2x3_t;
+    }
+    _vld3q_dup_s64(a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64"
+        )]
+        fn _vld3_f64(ptr: *const float64x1_t) -> float64x1x3_t;
+    }
+    _vld3_f64(a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8"
+        )]
+        fn _vld3_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x1x3_t;
+    }
+    _vld3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) -> poly64x1x3_t {
+    static_assert!(LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8"
+        )]
+        fn _vld3_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x1x3_t;
+    }
+    _vld3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t {
+    static_assert!(LANE == 0);
+    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
+    let mut ret_val: poly64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
+    let mut ret_val: uint64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64"
+        )]
+        fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t;
+    }
+    _vld3q_f64(a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64"
+        )]
+        fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t;
+    }
+    _vld3q_s64(a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8"
+        )]
+        fn _vld3q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x2x3_t;
+    }
+    _vld3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) -> poly64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8"
+        )]
+        fn _vld3q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x16x3_t;
+    }
+    _vld3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8"
+        )]
+        fn _vld3q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x2x3_t;
+    }
+    _vld3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
+    let mut ret_val: poly64x2x3_t = transmute(vld3q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
+    transmute(vld3q_s64(transmute(a)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
+    let mut ret_val: uint64x2x3_t = transmute(vld3q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64"
+        )]
+        fn _vld4_dup_f64(ptr: *const f64) -> float64x1x4_t;
+    }
+    _vld4_dup_f64(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64"
+        )]
+        fn _vld4q_dup_f64(ptr: *const f64) -> float64x2x4_t;
+    }
+    _vld4q_dup_f64(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64"
+        )]
+        fn _vld4q_dup_s64(ptr: *const i64) -> int64x2x4_t;
+    }
+    _vld4q_dup_s64(a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64"
+        )]
+        fn _vld4_f64(ptr: *const float64x1_t) -> float64x1x4_t;
+    }
+    _vld4_f64(a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8"
+        )]
+        fn _vld4_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x1x4_t;
+    }
+    _vld4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8"
+        )]
+        fn _vld4_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            d: int64x1_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x1x4_t;
+    }
+    _vld4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) -> poly64x1x4_t {
+    static_assert!(LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t {
+    static_assert!(LANE == 0);
+    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
+    let mut ret_val: poly64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_dup_s64(transmute(a)))
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
+    let mut ret_val: uint64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64"
+        )]
+        fn _vld4q_f64(ptr: *const float64x2_t) -> float64x2x4_t;
+    }
+    _vld4q_f64(a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64"
+        )]
+        fn _vld4q_s64(ptr: *const int64x2_t) -> int64x2x4_t;
+    }
+    _vld4q_s64(a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8"
+        )]
+        fn _vld4q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float64x2x4_t;
+    }
+    _vld4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8"
+        )]
+        fn _vld4q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x16x4_t;
+    }
+    _vld4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8"
+        )]
+        fn _vld4q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            d: int64x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int64x2x4_t;
+    }
+    _vld4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) -> poly64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t {
+    static_assert_uimm_bits!(LANE, 4);
+    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
+    let mut ret_val: poly64x2x4_t = transmute(vld4q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
+    transmute(vld4q_s64(transmute(a)))
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
+    let mut ret_val: uint64x2x4_t = transmute(vld4q_s64(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
+}
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v1f64"
+        )]
+        fn _vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmax_f64(a, b)
+}
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v2f64"
+        )]
+        fn _vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vmaxq_f64(a, b)
+}
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
+        )]
+        fn _vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmaxnm_f64(a, b)
+}
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
+        )]
+        fn _vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vmaxnmq_f64(a, b)
+}
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vmaxnmv_f32(a: float32x2_t) -> f32;
+    }
+    _vmaxnmv_f32(a)
+}
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vmaxnmvq_f64(a: float64x2_t) -> f64;
+    }
+    _vmaxnmvq_f64(a)
+}
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
+        )]
+        fn _vmaxnmvq_f32(a: float32x4_t) -> f32;
+    }
+    _vmaxnmvq_f32(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vmaxv_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"
+        )]
+        fn _vmaxv_f32(a: float32x2_t) -> f32;
+    }
+    _vmaxv_f32(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxv))]
+pub unsafe fn vmaxvq_f32(a: float32x4_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"
+        )]
+        fn _vmaxvq_f32(a: float32x4_t) -> f32;
+    }
+    _vmaxvq_f32(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vmaxvq_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"
+        )]
+        fn _vmaxvq_f64(a: float64x2_t) -> f64;
+    }
+    _vmaxvq_f64(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxv_s8(a: int8x8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"
+        )]
+        fn _vmaxv_s8(a: int8x8_t) -> i8;
+    }
+    _vmaxv_s8(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s8(a: int8x16_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i8.v16i8"
+        )]
+        fn _vmaxvq_s8(a: int8x16_t) -> i8;
+    }
+    _vmaxvq_s8(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxv_s16(a: int16x4_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"
+        )]
+        fn _vmaxv_s16(a: int16x4_t) -> i16;
+    }
+    _vmaxv_s16(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s16(a: int16x8_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"
+        )]
+        fn _vmaxvq_s16(a: int16x8_t) -> i16;
+    }
+    _vmaxvq_s16(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub unsafe fn vmaxv_s32(a: int32x2_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"
+        )]
+        fn _vmaxv_s32(a: int32x2_t) -> i32;
+    }
+    _vmaxv_s32(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub unsafe fn vmaxvq_s32(a: int32x4_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"
+        )]
+        fn _vmaxvq_s32(a: int32x4_t) -> i32;
+    }
+    _vmaxvq_s32(a)
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxv_u8(a: uint8x8_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"
+        )]
+        fn _vmaxv_u8(a: int8x8_t) -> i8;
+    }
+    _vmaxv_u8(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u8(a: uint8x16_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i8.v16i8"
+        )]
+        fn _vmaxvq_u8(a: int8x16_t) -> i8;
+    }
+    _vmaxvq_u8(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxv_u16(a: uint16x4_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"
+        )]
+        fn _vmaxv_u16(a: int16x4_t) -> i16;
+    }
+    _vmaxv_u16(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u16(a: uint16x8_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"
+        )]
+        fn _vmaxvq_u16(a: int16x8_t) -> i16;
+    }
+    _vmaxvq_u16(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub unsafe fn vmaxv_u32(a: uint32x2_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"
+        )]
+        fn _vmaxv_u32(a: int32x2_t) -> i32;
+    }
+    _vmaxv_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector max."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxvq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub unsafe fn vmaxvq_u32(a: uint32x4_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"
+        )]
+        fn _vmaxvq_u32(a: int32x4_t) -> i32;
+    }
+    _vmaxvq_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v1f64"
+        )]
+        fn _vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vmin_f64(a, b)
+}
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v2f64"
+        )]
+        fn _vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vminq_f64(a, b)
+}
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v1f64"
+        )]
+        fn _vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vminnm_f64(a, b)
+}
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f64"
+        )]
+        fn _vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vminnmq_f64(a, b)
+}
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vminnmv_f32(a: float32x2_t) -> f32;
+    }
+    _vminnmv_f32(a)
+}
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vminnmvq_f64(a: float64x2_t) -> f64;
+    }
+    _vminnmvq_f64(a)
+}
+#[doc = "Floating-point minimum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fminnmv))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
+        )]
+        fn _vminnmvq_f32(a: float32x4_t) -> f32;
+    }
+    _vminnmvq_f32(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vminv_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
+        )]
+        fn _vminv_f32(a: float32x2_t) -> f32;
+    }
+    _vminv_f32(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminv))]
+pub unsafe fn vminvq_f32(a: float32x4_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f32.v4f32"
+        )]
+        fn _vminvq_f32(a: float32x4_t) -> f32;
+    }
+    _vminvq_f32(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
+        )]
+        fn _vminvq_f64(a: float64x2_t) -> f64;
+    }
+    _vminvq_f64(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminv_s8(a: int8x8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i8.v8i8"
+        )]
+        fn _vminv_s8(a: int8x8_t) -> i8;
+    }
+    _vminv_s8(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s8(a: int8x16_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i8.v16i8"
+        )]
+        fn _vminvq_s8(a: int8x16_t) -> i8;
+    }
+    _vminvq_s8(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminv_s16(a: int16x4_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i16.v4i16"
+        )]
+        fn _vminv_s16(a: int16x4_t) -> i16;
+    }
+    _vminv_s16(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s16(a: int16x8_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i16.v8i16"
+        )]
+        fn _vminvq_s16(a: int16x8_t) -> i16;
+    }
+    _vminvq_s16(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub unsafe fn vminv_s32(a: int32x2_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i32.v2i32"
+        )]
+        fn _vminv_s32(a: int32x2_t) -> i32;
+    }
+    _vminv_s32(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub unsafe fn vminvq_s32(a: int32x4_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminv.i32.v4i32"
+        )]
+        fn _vminvq_s32(a: int32x4_t) -> i32;
+    }
+    _vminvq_s32(a)
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminv_u8(a: uint8x8_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i8.v8i8"
+        )]
+        fn _vminv_u8(a: int8x8_t) -> i8;
+    }
+    _vminv_u8(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u8(a: uint8x16_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i8.v16i8"
+        )]
+        fn _vminvq_u8(a: int8x16_t) -> i8;
+    }
+    _vminvq_u8(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminv_u16(a: uint16x4_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i16.v4i16"
+        )]
+        fn _vminv_u16(a: int16x4_t) -> i16;
+    }
+    _vminv_u16(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u16(a: uint16x8_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i16.v8i16"
+        )]
+        fn _vminvq_u16(a: int16x8_t) -> i16;
+    }
+    _vminvq_u16(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub unsafe fn vminv_u32(a: uint32x2_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i32.v2i32"
+        )]
+        fn _vminv_u32(a: int32x2_t) -> i32;
+    }
+    _vminv_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Horizontal vector min."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminvq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub unsafe fn vminvq_u32(a: uint32x4_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminv.i32.v4i32"
+        )]
+        fn _vminvq_u32(a: int32x4_t) -> i32;
+    }
+    _vminvq_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_add(a, simd_mul(b, c))
+}
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fmul))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_add(a, simd_mul(b, c))
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
             ]
         ),
-        13 => simd_shuffle!(
-            a,
-            b,
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
             [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                16 + LANE2 as u32,
-                14,
-                15
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
             ]
         ),
-        14 => simd_shuffle!(
-            a,
-            b,
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
             [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                16 + LANE2 as u32,
-                15
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
             ]
         ),
-        15 => simd_shuffle!(
-            a,
-            b,
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
             [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                6,
-                7,
-                8,
-                9,
-                10,
-                11,
-                12,
-                13,
-                14,
-                16 + LANE2 as u32
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
             ]
         ),
-        _ => unreachable_unchecked(),
-    }
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
-    a: poly16x4_t,
-    b: poly16x8_t,
-) -> poly16x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 3);
-    let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlal_high_s16(a, b, vdupq_n_s16(c))
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlal_high_s32(a, b, vdupq_n_s32(c))
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlal_high_u16(a, b, vdupq_n_u16(c))
+}
+#[doc = "Multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlal_high_u32(a, b, vdupq_n_u32(c))
+}
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_s8(a, b, c)
+}
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlal_s16(a, b, c)
+}
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(smlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlal_s32(a, b, c)
+}
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlal_u8(a, b, c)
+}
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlal_u16(a, b, c)
+}
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlal2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlal_u32(a, b, c)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
-    a: poly16x8_t,
-    b: poly16x8_t,
-) -> poly16x8_t {
-    static_assert_uimm_bits!(LANE1, 3);
-    static_assert_uimm_bits!(LANE2, 3);
-    match LANE1 & 0b111 {
-        0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-        2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-        4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-        5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-        6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-        7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
-    a: poly64x2_t,
-    b: poly64x2_t,
-) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert_uimm_bits!(LANE2, 1);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
-    a: float32x4_t,
-    b: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE1, 2);
-    static_assert_uimm_bits!(LANE2, 1);
-    let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);
-    match LANE1 & 0b11 {
-        0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-        2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-        3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
-    a: float64x2_t,
-    b: float64x1_t,
-) -> float64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_high_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
+pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(
     a: int64x2_t,
-    b: int64x1_t,
+    b: int32x4_t,
+    c: int32x2_t,
 ) -> int64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
-    a: uint64x2_t,
-    b: uint64x1_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
-    a: poly64x2_t,
-    b: poly64x1_t,
-) -> poly64x2_t {
-    static_assert_uimm_bits!(LANE1, 1);
-    static_assert!(LANE2 == 0);
-    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
-    match LANE1 & 0b1 {
-        0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-        1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_high_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcreate_f64(a: u64) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to lower precision narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_high_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to higher precision long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_f32)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vmlsl_high_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_s64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_s64(a: int64x1_t) -> float64x1_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vmlsl_high_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_s64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_f64_s64(a: int64x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
+    vmlsl_high_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f64_u64)"]
+#[doc = "Multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_f64_u64(a: uint64x1_t) -> float64x1_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
+    vmlsl_high_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f64_u64)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    simd_cast(a)
+pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_s8(a, b, c)
 }
-#[doc = "Floating-point convert to lower precision narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3])
+pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlsl_s16(a, b, c)
 }
-#[doc = "Floating-point convert to higher precision long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl))]
+#[cfg_attr(test, assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
-    let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
-    simd_cast(b)
+pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlsl_s32(a, b, c)
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f64_s64<const N: i32>(a: int64x1_t) -> float64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64"
-        )]
-        fn _vcvt_n_f64_s64(a: int64x1_t, n: i32) -> float64x1_t;
-    }
-    _vcvt_n_f64_s64(a, N)
+pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmlsl_u8(a, b, c)
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_s64)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f64_s64<const N: i32>(a: int64x2_t) -> float64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64"
-        )]
-        fn _vcvtq_n_f64_s64(a: int64x2_t, n: i32) -> float64x2_t;
-    }
-    _vcvtq_n_f64_s64(a, N)
+pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
+    vmlsl_u16(a, b, c)
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_u64)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f64_u64<const N: i32>(a: uint64x1_t) -> float64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64"
-        )]
-        fn _vcvt_n_f64_u64(a: int64x1_t, n: i32) -> float64x1_t;
-    }
-    _vcvt_n_f64_u64(a.as_signed(), N)
+pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
+    vmlsl_u32(a, b, c)
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f64_u64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f64_u64<const N: i32>(a: uint64x2_t) -> float64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64"
-        )]
-        fn _vcvtq_n_f64_u64(a: int64x2_t, n: i32) -> float64x2_t;
-    }
-    _vcvtq_n_f64_u64(a.as_signed(), N)
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_s8(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s64_f64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_s64_f64<const N: i32>(a: float64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64"
-        )]
-        fn _vcvt_n_s64_f64(a: float64x1_t, n: i32) -> int64x1_t;
-    }
-    _vcvt_n_s64_f64(a, N)
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vmovl_s16(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s64_f64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_s64_f64<const N: i32>(a: float64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64"
-        )]
-        fn _vcvtq_n_s64_f64(a: float64x2_t, n: i32) -> int64x2_t;
-    }
-    _vcvtq_n_s64_f64(a, N)
+#[cfg_attr(test, assert_instr(sxtl2))]
+pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vmovl_s32(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u64_f64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_u64_f64<const N: i32>(a: float64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64"
-        )]
-        fn _vcvt_n_u64_f64(a: float64x1_t, n: i32) -> int64x1_t;
-    }
-    _vcvt_n_u64_f64(a, N).as_unsigned()
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmovl_u8(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u64_f64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_u64_f64<const N: i32>(a: float64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64"
-        )]
-        fn _vcvtq_n_u64_f64(a: float64x2_t, n: i32) -> int64x2_t;
-    }
-    _vcvtq_n_u64_f64(a, N).as_unsigned()
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vmovl_u16(a)
 }
-#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s64_f64)"]
+#[doc = "Vector move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptosi.sat.v1i64.v1f64"
-        )]
-        fn _vcvt_s64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvt_s64_f64(a)
+#[cfg_attr(test, assert_instr(uxtl2))]
+pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vmovl_u32(a)
 }
-#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s64_f64)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptosi.sat.v2i64.v2f64"
-        )]
-        fn _vcvtq_s64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtq_s64_f64(a)
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    let c: int8x8_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u64_f64)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptoui.sat.v1i64.v1f64"
-        )]
-        fn _vcvt_u64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvt_u64_f64(a).as_unsigned()
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    let c: int16x4_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u64_f64)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptoui.sat.v2i64.v2f64"
-        )]
-        fn _vcvtq_u64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtq_u64_f64(a).as_unsigned()
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    let c: int32x2_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3])
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s32_f32)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32"
-        )]
-        fn _vcvta_s32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvta_s32_f32(a)
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    let c: uint8x8_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s32_f32)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32"
-        )]
-        fn _vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtaq_s32_f32(a)
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    let c: uint16x4_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_s64_f64)"]
+#[doc = "Extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64"
-        )]
-        fn _vcvta_s64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvta_s64_f64(a)
+#[cfg_attr(test, assert_instr(xtn2))]
+pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    let c: uint32x2_t = simd_cast(b);
+    simd_shuffle!(a, c, [0, 1, 2, 3])
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_s64_f64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64"
-        )]
-        fn _vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtaq_s64_f64(a)
+#[cfg_attr(test, assert_instr(fmul))]
+pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_mul(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u32_f32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32"
-        )]
-        fn _vcvta_u32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvta_u32_f32(a).as_unsigned()
+#[cfg_attr(test, assert_instr(fmul))]
+pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_mul(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u32_f32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32"
-        )]
-        fn _vcvtaq_u32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtaq_u32_f32(a).as_unsigned()
+pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvta_u64_f64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64"
-        )]
-        fn _vcvta_u64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvta_u64_f64(a).as_unsigned()
+pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtaq_u64_f64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64"
-        )]
-        fn _vcvtaq_u64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtaq_u64_f64(a).as_unsigned()
+pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
+    simd_mul(a, vdup_n_f64(b))
 }
-#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_s32_f32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
+#[cfg_attr(test, assert_instr(fmul))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtas_s32_f32(a: f32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.i32.f32"
-        )]
-        fn _vcvtas_s32_f32(a: f32) -> i32;
-    }
-    _vcvtas_s32_f32(a)
+pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
+    simd_mul(a, vdupq_n_f64(b))
 }
-#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_s64_f64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtas))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtad_s64_f64(a: f64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtas.i64.f64"
-        )]
-        fn _vcvtad_s64_f64(a: f64) -> i64;
-    }
-    _vcvtad_s64_f64(a)
+pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+    static_assert!(LANE == 0);
+    let b: f64 = simd_extract!(b, LANE as u32);
+    a * b
 }
-#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtas_u32_f32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtas_u32_f32(a: f32) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.i32.f32"
-        )]
-        fn _vcvtas_u32_f32(a: f32) -> i32;
-    }
-    _vcvtas_u32_f32(a).as_unsigned()
+pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_s16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtad_u64_f64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtau))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtad_u64_f64(a: f64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtau.i64.f64"
-        )]
-        fn _vcvtad_u64_f64(a: f64) -> i64;
-    }
-    _vcvtad_u64_f64(a).as_unsigned()
+pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_high_s16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_s64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_f64_s64(a: i64) -> f64 {
-    a as f64
+pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_high_s32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_s32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf))]
+#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_f32_s32(a: i32) -> f32 {
-    a as f32
+pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_s32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s32_f32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32"
-        )]
-        fn _vcvtm_s32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvtm_s32_f32(a)
+pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_u16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s32_f32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32"
-        )]
-        fn _vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtmq_s32_f32(a)
+pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_high_u16(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_s64_f64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64"
-        )]
-        fn _vcvtm_s64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtm_s64_f64(a)
+pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_high_u32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_s64_f64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64"
-        )]
-        fn _vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtmq_s64_f64(a)
+pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_high_u32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u32_f32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32"
-        )]
-        fn _vcvtm_u32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvtm_u32_f32(a).as_unsigned()
+pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    vmull_high_s16(a, vdupq_n_s16(b))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u32_f32)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32"
-        )]
-        fn _vcvtmq_u32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtmq_u32_f32(a).as_unsigned()
+pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    vmull_high_s32(a, vdupq_n_s32(b))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtm_u64_f64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64"
-        )]
-        fn _vcvtm_u64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtm_u64_f64(a).as_unsigned()
+pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
+    vmull_high_u16(a, vdupq_n_u16(b))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmq_u64_f64)"]
+#[doc = "Multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
+#[cfg_attr(test, assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64"
-        )]
-        fn _vcvtmq_u64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtmq_u64_f64(a).as_unsigned()
+pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
+    vmull_high_u32(a, vdupq_n_u32(b))
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_s32_f32)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtms_s32_f32(a: f32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.i32.f32"
-        )]
-        fn _vcvtms_s32_f32(a: f32) -> i32;
-    }
-    _vcvtms_s32_f32(a)
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
+    vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1))
 }
-#[doc = "Floating-point convert to signed integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_s64_f64)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtms))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmd_s64_f64(a: f64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtms.i64.f64"
-        )]
-        fn _vcvtmd_s64_f64(a: f64) -> i64;
-    }
-    _vcvtmd_s64_f64(a)
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_p8(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtms_u32_f32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtms_u32_f32(a: f32) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.i32.f32"
-        )]
-        fn _vcvtms_u32_f32(a: f32) -> i32;
-    }
-    _vcvtms_u32_f32(a).as_unsigned()
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_s8(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmd_u64_f64)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtmu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtmd_u64_f64(a: f64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtmu.i64.f64"
-        )]
-        fn _vcvtmd_u64_f64(a: f64) -> i64;
-    }
-    _vcvtmd_u64_f64(a).as_unsigned()
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vmull_s16(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s32_f32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32"
-        )]
-        fn _vcvtn_s32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvtn_s32_f32(a)
+#[cfg_attr(test, assert_instr(smull2))]
+pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vmull_s32(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s32_f32)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32"
-        )]
-        fn _vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtnq_s32_f32(a)
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vmull_u8(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_s64_f64)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64"
-        )]
-        fn _vcvtn_s64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtn_s64_f64(a)
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vmull_u16(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_s64_f64)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64"
-        )]
-        fn _vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtnq_s64_f64(a)
+#[cfg_attr(test, assert_instr(umull2))]
+pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vmull_u32(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u32_f32)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32"
+            link_name = "llvm.aarch64.neon.pmull64"
         )]
-        fn _vcvtn_u32_f32(a: float32x2_t) -> int32x2_t;
+        fn _vmull_p64(a: p64, b: p64) -> int8x16_t;
     }
-    _vcvtn_u32_f32(a).as_unsigned()
+    transmute(_vmull_p64(a, b))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u32_f32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32"
-        )]
-        fn _vcvtnq_u32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtnq_u32_f32(a).as_unsigned()
+pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE == 0);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtn_u64_f64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64"
-        )]
-        fn _vcvtn_u64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtn_u64_f64(a).as_unsigned()
+pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnq_u64_f64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64"
-        )]
-        fn _vcvtnq_u64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtnq_u64_f64(a).as_unsigned()
+pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 1);
+    let b: f32 = simd_extract!(b, LANE as u32);
+    a * b
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_s32_f32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtns_s32_f32(a: f32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.i32.f32"
-        )]
-        fn _vcvtns_s32_f32(a: f32) -> i32;
-    }
-    _vcvtns_s32_f32(a)
+pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: f32 = simd_extract!(b, LANE as u32);
+    a * b
 }
-#[doc = "Floating-point convert to signed integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_s64_f64)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtns))]
+#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnd_s64_f64(a: f64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtns.i64.f64"
-        )]
-        fn _vcvtnd_s64_f64(a: f64) -> i64;
-    }
-    _vcvtnd_s64_f64(a)
+pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(LANE, 1);
+    let b: f64 = simd_extract!(b, LANE as u32);
+    a * b
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtns_u32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtns_u32_f32(a: f32) -> u32 {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.i32.f32"
+            link_name = "llvm.aarch64.neon.fmulx.v2f32"
         )]
-        fn _vcvtns_u32_f32(a: f32) -> i32;
+        fn _vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vcvtns_u32_f32(a).as_unsigned()
+    _vmulx_f32(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnd_u64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtnu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtnd_u64_f64(a: f64) -> u64 {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtnu.i64.f64"
+            link_name = "llvm.aarch64.neon.fmulx.v4f32"
         )]
-        fn _vcvtnd_u64_f64(a: f64) -> i64;
+        fn _vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    _vcvtnd_u64_f64(a).as_unsigned()
+    _vmulxq_f32(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32"
+            link_name = "llvm.aarch64.neon.fmulx.v1f64"
         )]
-        fn _vcvtp_s32_f32(a: float32x2_t) -> int32x2_t;
+        fn _vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
     }
-    _vcvtp_s32_f32(a)
+    _vmulx_f64(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32"
+            link_name = "llvm.aarch64.neon.fmulx.v2f64"
         )]
-        fn _vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t;
+        fn _vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
     }
-    _vcvtpq_s32_f32(a)
+    _vmulxq_f64(a, b)
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_s64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64"
-        )]
-        fn _vcvtp_s64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtp_s64_f64(a)
+pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_s64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64"
-        )]
-        fn _vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtpq_s64_f64(a)
+pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32"
-        )]
-        fn _vcvtp_u32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvtp_u32_f32(a).as_unsigned()
+pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxq_f32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32"
-        )]
-        fn _vcvtpq_u32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtpq_u32_f32(a).as_unsigned()
+pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulxq_f32(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtp_u64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64"
-        )]
-        fn _vcvtp_u64_f64(a: float64x1_t) -> int64x1_t;
-    }
-    _vcvtp_u64_f64(a).as_unsigned()
+pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpq_u64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64"
-        )]
-        fn _vcvtpq_u64_f64(a: float64x2_t) -> int64x2_t;
-    }
-    _vcvtpq_u64_f64(a).as_unsigned()
+pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_s32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtps_s32_f32(a: f32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.i32.f32"
-        )]
-        fn _vcvtps_s32_f32(a: f32) -> i32;
-    }
-    _vcvtps_s32_f32(a)
+pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Floating-point convert to signed integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_s64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpd_s64_f64(a: f64) -> i64 {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtps.i64.f64"
+            link_name = "llvm.aarch64.neon.fmulx.f64"
         )]
-        fn _vcvtpd_s64_f64(a: f64) -> i64;
+        fn _vmulxd_f64(a: f64, b: f64) -> f64;
     }
-    _vcvtpd_s64_f64(a)
+    _vmulxd_f64(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtps_u32_f32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtps_u32_f32(a: f32) -> u32 {
+#[cfg_attr(test, assert_instr(fmulx))]
+pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.i32.f32"
+            link_name = "llvm.aarch64.neon.fmulx.f32"
         )]
-        fn _vcvtps_u32_f32(a: f32) -> i32;
+        fn _vmulxs_f32(a: f32, b: f32) -> f32;
     }
-    _vcvtps_u32_f32(a).as_unsigned()
+    _vmulxs_f32(a, b)
 }
-#[doc = "Floating-point convert to unsigned integer, rounding toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtpd_u64_f64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtpu))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtpd_u64_f64(a: f64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtpu.i64.f64"
-        )]
-        fn _vcvtpd_u64_f64(a: f64) -> i64;
-    }
-    _vcvtpd_u64_f64(a).as_unsigned()
+pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
+    static_assert!(LANE == 0);
+    vmulxd_f64(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_f32_u32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_f32_u32(a: u32) -> f32 {
-    a as f32
+pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxd_f64(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_f64_u64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf))]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_f64_u64(a: u64) -> f64 {
-    a as f64
+pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 1);
+    vmulxs_f32(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_s32)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_f32_s32<const N: i32>(a: i32) -> f32 {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f32.i32"
-        )]
-        fn _vcvts_n_f32_s32(a: i32, n: i32) -> f32;
-    }
-    _vcvts_n_f32_s32(a, N)
+pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vmulxs_f32(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_s64)"]
+#[doc = "Floating-point multiply extended"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_f64_s64<const N: i32>(a: i64) -> f64 {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.f64.i64"
-        )]
-        fn _vcvtd_n_f64_s64(a: i64, n: i32) -> f64;
-    }
-    _vcvtd_n_f64_s64(a, N)
+pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
+    static_assert!(LANE == 0);
+    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_f32_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_f32_u32<const N: i32>(a: u32) -> f32 {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f32.i32"
-        )]
-        fn _vcvts_n_f32_u32(a: i32, n: i32) -> f32;
-    }
-    _vcvts_n_f32_u32(a.as_signed(), N)
+#[cfg_attr(test, assert_instr(fneg))]
+pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
+    simd_neg(a)
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_f64_u64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_f64_u64<const N: i32>(a: u64) -> f64 {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.f64.i64"
-        )]
-        fn _vcvtd_n_f64_u64(a: i64, n: i32) -> f64;
-    }
-    _vcvtd_n_f64_u64(a.as_signed(), N)
+#[cfg_attr(test, assert_instr(fneg))]
+pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
+    simd_neg(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_s32_f32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_s32_f32<const N: i32>(a: f32) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i32.f32"
-        )]
-        fn _vcvts_n_s32_f32(a: f32, n: i32) -> i32;
-    }
-    _vcvts_n_s32_f32(a, N)
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
+    simd_neg(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_s64_f64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_s64_f64<const N: i32>(a: f64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.i64.f64"
-        )]
-        fn _vcvtd_n_s64_f64(a: f64, n: i32) -> i64;
-    }
-    _vcvtd_n_s64_f64(a, N)
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
+    simd_neg(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_n_u32_f32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_n_u32_f32<const N: i32>(a: f32) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i32.f32"
-        )]
-        fn _vcvts_n_u32_f32(a: f32, n: i32) -> i32;
-    }
-    _vcvts_n_u32_f32(a, N).as_unsigned()
+#[cfg_attr(test, assert_instr(neg))]
+pub unsafe fn vnegd_s64(a: i64) -> i64 {
+    a.wrapping_neg()
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_n_u64_f64)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_n_u64_f64<const N: i32>(a: f64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.i64.f64"
-        )]
-        fn _vcvtd_n_u64_f64(a: f64, n: i32) -> i64;
-    }
-    _vcvtd_n_u64_f64(a, N).as_unsigned()
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
+    let a1: f64 = simd_extract!(a, 0);
+    let a2: f64 = simd_extract!(a, 1);
+    a1 + a2
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_s32_f32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_s32_f32(a: f32) -> i32 {
-    a as i32
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
+    let a1: f32 = simd_extract!(a, 0);
+    let a2: f32 = simd_extract!(a, 1);
+    a1 + a2
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_s64_f64)"]
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzs))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_s64_f64(a: f64) -> i64 {
-    a as i64
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddd_s64(a: int64x2_t) -> i64 {
+    transmute(vaddvq_u64(transmute(a)))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvts_u32_f32)"]
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvts_u32_f32(a: f32) -> u32 {
-    a as u32
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddd_s64(a: int64x2_t) -> i64 {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(vaddvq_u64(transmute(a)))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtd_u64_f64)"]
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtd_u64_f64(a: f64) -> u64 {
-    a as u64
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 {
+    vaddvq_u64(a)
 }
-#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64"
+            link_name = "llvm.aarch64.neon.faddp.v4f32"
         )]
-        fn _vcvtx_f32_f64(a: float64x2_t) -> float32x2_t;
+        fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    _vcvtx_f32_f64(a)
+    _vpaddq_f32(a, b)
 }
-#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(faddp))]
+pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f64"
+        )]
+        fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpaddq_f64(a, b)
 }
-#[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtxd_f32_f64(a: f64) -> f32 {
-    simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v16i8"
+        )]
+        fn _vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vpaddq_s8(a, b)
 }
-#[doc = "Divide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f32)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fdiv))]
-pub unsafe fn vdiv_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_div(a, b)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v8i16"
+        )]
+        fn _vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vpaddq_s16(a, b)
 }
-#[doc = "Divide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f32)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fdiv))]
-pub unsafe fn vdivq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_div(a, b)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v4i32"
+        )]
+        fn _vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vpaddq_s32(a, b)
 }
-#[doc = "Divide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fdiv))]
-pub unsafe fn vdiv_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_div(a, b)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v2i64"
+        )]
+        fn _vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vpaddq_s64(a, b)
 }
-#[doc = "Divide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fdiv))]
-pub unsafe fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_div(a, b)
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdot_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int8x8_t,
-    c: int8x16_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_s32(a, b, transmute(c))
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdotq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int8x16_t,
-    c: int8x16_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = transmute(c);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_s32(a, b, transmute(c))
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdot_laneq_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint8x8_t,
-    c: uint8x16_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_u32(a, b, transmute(c))
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    transmute(vpaddq_s8(transmute(a), transmute(b)))
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub unsafe fn vdotq_laneq_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint8x16_t,
-    c: uint8x16_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_u32(a, b, transmute(c))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vpaddq_s8(transmute(a), transmute(b)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_lane_f64<const N: i32>(a: float64x1_t) -> float64x1_t {
-    static_assert!(N == 0);
-    a
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    transmute(vpaddq_s16(transmute(a), transmute(b)))
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
-    static_assert!(N == 0);
-    a
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(vpaddq_s16(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<f64, _>(simd_extract!(a, N as u32))
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    transmute(vpaddq_s32(transmute(a), transmute(b)))
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<u64, _>(simd_extract!(a, N as u32))
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(vpaddq_s32(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    transmute(vpaddq_s64(transmute(a), transmute(b)))
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"]
+#[doc = "Add Pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(addp))]
+pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+    let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"]
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f32"
+        )]
+        fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vpmaxnm_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"]
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v4f32"
+        )]
+        fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpmaxnmq_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"]
+#[doc = "Floating-point Maximum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmp.v2f64"
+        )]
+        fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpmaxnmq_f64(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"]
+#[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 4))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vpmaxnmqd_f64(a: float64x2_t) -> f64;
+    }
+    _vpmaxnmqd_f64(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"]
+#[doc = "Floating-point maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vpmaxnms_f32(a: float32x2_t) -> f32;
+    }
+    _vpmaxnms_f32(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v4f32"
+        )]
+        fn _vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpmaxq_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 8))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v2f64"
+        )]
+        fn _vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpmaxq_f64(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
-    static_assert!(N == 0);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(smaxp))]
+pub unsafe fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v16i8"
+        )]
+        fn _vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vpmaxq_s8(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
-    static_assert!(N == 0);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(smaxp))]
+pub unsafe fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v8i16"
+        )]
+        fn _vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vpmaxq_s16(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
-    static_assert!(N == 0);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(smaxp))]
+pub unsafe fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v4i32"
+        )]
+        fn _vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vpmaxq_s32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_lane_f64<const N: i32>(a: float64x1_t) -> float64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(test, assert_instr(umaxp))]
+pub unsafe fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v16i8"
+        )]
+        fn _vpmaxq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vpmaxq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 0))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x2_t {
-    static_assert!(N == 0);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(test, assert_instr(umaxp))]
+pub unsafe fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i16"
+        )]
+        fn _vpmaxq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vpmaxq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_laneq_f64<const N: i32>(a: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(test, assert_instr(umaxp))]
+pub unsafe fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v4i32"
+        )]
+        fn _vpmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vpmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p64)"]
+#[doc = "Floating-point maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"
+        )]
+        fn _vpmaxqd_f64(a: float64x2_t) -> f64;
+    }
+    _vpmaxqd_f64(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_f32)"]
+#[doc = "Floating-point maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"
+        )]
+        fn _vpmaxs_f32(a: float32x2_t) -> f32;
+    }
+    _vpmaxs_f32(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"]
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
+        )]
+        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vpminnm_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"]
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v4f32"
+        )]
+        fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpminnmq_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"]
+#[doc = "Floating-point Minimum Number Pairwise (vector)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmp.v2f64"
+        )]
+        fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpminnmq_f64(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"]
+#[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vpminnmqd_f64(a: float64x2_t) -> f64;
+    }
+    _vpminnmqd_f64(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"]
+#[doc = "Floating-point minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 1))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
-    static_assert_uimm_bits!(N, 1);
-    simd_extract!(a, N as u32)
+pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vpminnms_f32(a: float32x2_t) -> f32;
+    }
+    _vpminnms_f32(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v4f32"
+        )]
+        fn _vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vpminq_f32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v2f64"
+        )]
+        fn _vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vpminq_f64(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(sminp))]
+pub unsafe fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v16i8"
+        )]
+        fn _vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vpminq_s8(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(sminp))]
+pub unsafe fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v8i16"
+        )]
+        fn _vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vpminq_s16(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(sminp))]
+pub unsafe fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v4i32"
+        )]
+        fn _vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vpminq_s32(a, b)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
-    static_assert_uimm_bits!(N, 2);
-    simd_extract!(a, N as u32)
+#[cfg_attr(test, assert_instr(uminp))]
+pub unsafe fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v16i8"
+        )]
+        fn _vpminq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vpminq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub unsafe fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3s.v16i8"
+            link_name = "llvm.aarch64.neon.uminp.v8i16"
         )]
-        fn _veor3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+        fn _vpminq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    _veor3q_s8(a, b, c)
+    _vpminq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub unsafe fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3s.v8i16"
+            link_name = "llvm.aarch64.neon.uminp.v4i32"
         )]
-        fn _veor3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+        fn _vpminq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _veor3q_s16(a, b, c)
+    _vpminq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s32)"]
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3s.v4i32"
+            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
         )]
-        fn _veor3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vpminqd_f64(a: float64x2_t) -> f64;
     }
-    _veor3q_s32(a, b, c)
+    _vpminqd_f64(a)
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s64)"]
+#[doc = "Floating-point minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3s.v2i64"
+            link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
         )]
-        fn _veor3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        fn _vpmins_f32(a: float32x2_t) -> f32;
     }
-    _veor3q_s64(a, b, c)
+    _vpmins_f32(a)
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u8)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3u.v16i8"
+            link_name = "llvm.aarch64.neon.sqabs.v1i64"
         )]
-        fn _veor3q_u8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+        fn _vqabs_s64(a: int64x1_t) -> int64x1_t;
     }
-    _veor3q_u8(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vqabs_s64(a)
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u16)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3u.v8i16"
+            link_name = "llvm.aarch64.neon.sqabs.v2i64"
         )]
-        fn _veor3q_u16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+        fn _vqabsq_s64(a: int64x2_t) -> int64x2_t;
     }
-    _veor3q_u16(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vqabsq_s64(a)
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u32)"]
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsb_s8(a: i8) -> i8 {
+    simd_extract!(vqabs_s8(vdup_n_s8(a)), 0)
+}
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsh_s16(a: i16) -> i16 {
+    simd_extract!(vqabs_s16(vdup_n_s16(a)), 0)
+}
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabss_s32(a: i32) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3u.v4i32"
+            link_name = "llvm.aarch64.neon.sqabs.i32"
         )]
-        fn _veor3q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vqabss_s32(a: i32) -> i32;
     }
-    _veor3q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vqabss_s32(a)
 }
-#[doc = "Three-way exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_u64)"]
+#[doc = "Signed saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-#[cfg_attr(test, assert_instr(eor3))]
-pub unsafe fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
+pub unsafe fn vqabsd_s64(a: i64) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.eor3u.v2i64"
+            link_name = "llvm.aarch64.neon.sqabs.i64"
         )]
-        fn _veor3q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        fn _vqabsd_s64(a: i64) -> i64;
     }
-    _veor3q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vqabsd_s64(a)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqadd_s8(a, b), 0)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqadd_s16(a, b), 0)
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmadd))]
-pub unsafe fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.v1f64"
-        )]
-        fn _vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t;
-    }
-    _vfma_f64(b, c, a)
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract!(vqadd_u8(a, b), 0)
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract!(vqadd_u16(a, b), 0)
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.i32"
+        )]
+        fn _vqadds_s32(a: i32, b: i32) -> i32;
+    }
+    _vqadds_s32(a, b)
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+#[cfg_attr(test, assert_instr(sqadd))]
+pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.i64"
+        )]
+        fn _vqaddd_s64(a: i64, b: i64) -> i64;
+    }
+    _vqaddd_s64(a, b)
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.i32"
+        )]
+        fn _vqadds_u32(a: i32, b: i32) -> i32;
+    }
+    _vqadds_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_laneq_f64<const LANE: i32>(
-    a: float64x2_t,
-    b: float64x2_t,
-    c: float64x2_t,
-) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+#[cfg_attr(test, assert_instr(uqadd))]
+pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.i64"
+        )]
+        fn _vqaddd_u64(a: i64, b: i64) -> i64;
+    }
+    _vqaddd_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_lane_f64<const LANE: i32>(
-    a: float64x1_t,
-    b: float64x1_t,
-    c: float64x1_t,
-) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfma_laneq_f64<const LANE: i32>(
-    a: float64x1_t,
-    b: float64x1_t,
-    c: float64x2_t,
-) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmadd))]
-pub unsafe fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
-    vfma_f64(a, b, vdup_n_f64(c))
+pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_lane_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.f64"
-        )]
-        fn _vfmad_lane_f64(a: f64, b: f64, c: f64) -> f64;
-    }
-    static_assert!(LANE == 0);
-    let c: f64 = simd_extract!(c, LANE as u32);
-    _vfmad_lane_f64(b, c, a)
+pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmla))]
-pub unsafe fn vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.v2f64"
-        )]
-        fn _vfmaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t;
-    }
-    _vfmaq_f64(b, c, a)
+pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmla, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmaq_lane_f64<const LANE: i32>(
-    a: float64x2_t,
-    b: float64x2_t,
-    c: float64x1_t,
-) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_high_s16(b, c))
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmla))]
-pub unsafe fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
-    vfmaq_f64(a, b, vdupq_n_f64(c))
+pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.f32"
-        )]
-        fn _vfmas_lane_f32(a: f32, b: f32, c: f32) -> f32;
-    }
-    static_assert_uimm_bits!(LANE, 1);
-    let c: f32 = simd_extract!(c, LANE as u32);
-    _vfmas_lane_f32(b, c, a)
+pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_high_s32(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.f32"
-        )]
-        fn _vfmas_laneq_f32(a: f32, b: f32, c: f32) -> f32;
-    }
-    static_assert_uimm_bits!(LANE, 2);
-    let c: f32 = simd_extract!(c, LANE as u32);
-    _vfmas_laneq_f32(b, c, a)
+pub unsafe fn vqdmlal_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fma.f64"
-        )]
-        fn _vfmad_laneq_f64(a: f64, b: f64, c: f64) -> f64;
-    }
-    static_assert_uimm_bits!(LANE, 1);
-    let c: f64 = simd_extract!(c, LANE as u32);
-    _vfmad_laneq_f64(b, c, a)
+pub unsafe fn vqdmlal_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    let b: float64x1_t = simd_neg(b);
-    vfma_f64(a, b, c)
+pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqadds_s32(a, simd_extract!(x, 0))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlal))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_laneq_f64<const LANE: i32>(
-    a: float64x2_t,
-    b: float64x2_t,
-    c: float64x2_t,
-) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
+    x as i64
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_lane_f64<const LANE: i32>(
-    a: float64x1_t,
-    b: float64x1_t,
-    c: float64x1_t,
-) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_laneq_f64<const LANE: i32>(
-    a: float64x1_t,
-    b: float64x1_t,
-    c: float64x2_t,
-) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub))]
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfms_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
-    vfms_f64(a, b, vdup_n_f64(c))
+pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    let b: float64x2_t = simd_neg(b);
-    vfmaq_f64(a, b, c)
+#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_lane_f64<const LANE: i32>(
-    a: float64x2_t,
-    b: float64x2_t,
-    c: float64x1_t,
-) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32)))
+pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
 }
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmls))]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
-    vfmsq_f64(a, b, vdupq_n_f64(c))
+pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_high_s16(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
-    vfmas_lane_f32::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
-    vfmas_laneq_f32::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_high_s32(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
-    vfmad_lane_f64::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
 }
-#[doc = "Floating-point fused multiply-subtract to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
-    vfmad_laneq_f64::<LANE>(a, -b, c)
+pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x2)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64"
-        )]
-        fn _vld1_f64_x2(a: *const f64) -> float64x1x2_t;
-    }
-    _vld1_f64_x2(a)
+pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64"
-        )]
-        fn _vld1_f64_x3(a: *const f64) -> float64x1x3_t;
-    }
-    _vld1_f64_x3(a)
+pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64"
-        )]
-        fn _vld1_f64_x4(a: *const f64) -> float64x1x4_t;
-    }
-    _vld1_f64_x4(a)
+pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
+#[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64"
-        )]
-        fn _vld1q_f64_x2(a: *const f64) -> float64x2x2_t;
-    }
-    _vld1q_f64_x2(a)
+pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64"
-        )]
-        fn _vld1q_f64_x3(a: *const f64) -> float64x2x3_t;
-    }
-    _vld1q_f64_x3(a)
+pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
+    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
+    vqsubs_s32(a, simd_extract!(x, 0))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmlsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64"
-        )]
-        fn _vld1q_f64_x4(a: *const f64) -> float64x2x4_t;
-    }
-    _vld1q_f64_x4(a)
+pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
+    let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
+    x as i64
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v1f64.p0f64"
-        )]
-        fn _vld2_dup_f64(ptr: *const f64) -> float64x1x2_t;
-    }
-    _vld2_dup_f64(a as _)
+pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f64)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2f64.p0f64"
-        )]
-        fn _vld2q_dup_f64(ptr: *const f64) -> float64x2x2_t;
-    }
-    _vld2q_dup_f64(a as _)
+pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s64)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2i64.p0i64"
-        )]
-        fn _vld2q_dup_s64(ptr: *const i64) -> int64x2x2_t;
-    }
-    _vld2q_dup_s64(a as _)
+pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f64)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v1f64.p0v1f64"
-        )]
-        fn _vld2_f64(ptr: *const float64x1_t) -> float64x1x2_t;
-    }
-    _vld2_f64(a as _)
+pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) -> float64x1x2_t {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v1f64.p0i8"
-        )]
-        fn _vld2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *const i8) -> float64x1x2_t;
-    }
-    _vld2_lane_f64(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> int64x1x2_t {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v1i64.p0i8"
-        )]
-        fn _vld2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *const i8) -> int64x1x2_t;
-    }
-    _vld2_lane_s64(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(N, 3);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmulhh_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) -> poly64x1x2_t {
-    static_assert!(LANE == 0);
-    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqdmulh_s16(a, b), 0)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) -> uint64x1x2_t {
-    static_assert!(LANE == 0);
-    transmute(vld2_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    simd_extract!(vqdmulh_s32(a, b), 0)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
-    transmute(vld2q_dup_s64(transmute(a)))
+pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(N, 1);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
-    transmute(vld2q_dup_s64(transmute(a)))
+pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulhs_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2f64.p0v2f64"
-        )]
-        fn _vld2q_f64(ptr: *const float64x2_t) -> float64x2x2_t;
-    }
-    _vld2q_f64(a as _)
+pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2i64.p0v2i64"
-        )]
-        fn _vld2q_s64(ptr: *const int64x2_t) -> int64x2x2_t;
-    }
-    _vld2q_s64(a as _)
+pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -> float64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2f64.p0i8"
-        )]
-        fn _vld2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *const i8)
-            -> float64x2x2_t;
-    }
-    _vld2q_lane_f64(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s8)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> int8x16x2_t {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v16i8.p0i8"
-        )]
-        fn _vld2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *const i8) -> int8x16x2_t;
-    }
-    _vld2q_lane_s8(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) -> int64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2i64.p0i8"
-        )]
-        fn _vld2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *const i8) -> int64x2x2_t;
-    }
-    _vld2q_lane_s64(b.0, b.1, LANE as i64, a as _)
+pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmull_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) -> poly64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmull_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u8)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    vqdmull_s16(a, b)
+}
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> uint8x16x2_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    vqdmull_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u64)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) -> uint64x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 3);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p8)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> poly8x16x2_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld2q_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
-    transmute(vld2q_s64(transmute(a)))
+pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 2);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
-    transmute(vld2q_s64(transmute(a)))
+pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmullh_s16(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v1f64.p0f64"
-        )]
-        fn _vld3_dup_f64(ptr: *const f64) -> float64x1x3_t;
-    }
-    _vld3_dup_f64(a as _)
+pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
+    static_assert_uimm_bits!(N, 2);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulls_s32(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2f64.p0f64"
-        )]
-        fn _vld3q_dup_f64(ptr: *const f64) -> float64x2x3_t;
-    }
-    _vld3q_dup_f64(a as _)
+pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
+    static_assert_uimm_bits!(N, 3);
+    let b: i16 = simd_extract!(b, N as u32);
+    vqdmullh_s16(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2i64.p0i64"
-        )]
-        fn _vld3q_dup_s64(ptr: *const i64) -> int64x2x3_t;
-    }
-    _vld3q_dup_s64(a as _)
+pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqdmull_s16(a, b), 0)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v1f64.p0v1f64"
-        )]
-        fn _vld3_f64(ptr: *const float64x1_t) -> float64x1x3_t;
-    }
-    _vld3_f64(a as _)
+pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
+    static_assert_uimm_bits!(N, 1);
+    let b: i32 = simd_extract!(b, N as u32);
+    vqdmulls_s32(a, b)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f64)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) -> float64x1x3_t {
-    static_assert!(LANE == 0);
+pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v1f64.p0i8"
+            link_name = "llvm.aarch64.neon.sqdmulls.scalar"
         )]
-        fn _vld3_lane_f64(
-            a: float64x1_t,
-            b: float64x1_t,
-            c: float64x1_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float64x1x3_t;
+        fn _vqdmulls_s32(a: i32, b: i32) -> i64;
     }
-    _vld3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) -> poly64x1x3_t {
-    static_assert!(LANE == 0);
-    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+    _vqdmulls_s32(a, b)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> int64x1x3_t {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v1i64.p0i8"
-        )]
-        fn _vld3_lane_s64(
-            a: int64x1_t,
-            b: int64x1_t,
-            c: int64x1_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int64x1x3_t;
-    }
-    _vld3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovn_s16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) -> uint64x1x3_t {
-    static_assert!(LANE == 0);
-    transmute(vld3_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
-    transmute(vld3q_dup_s64(transmute(a)))
+pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3])
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
-    transmute(vld3q_dup_s64(transmute(a)))
+pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovn_u16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2f64.p0v2f64"
-        )]
-        fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t;
-    }
-    _vld3q_f64(a as _)
+#[cfg_attr(test, assert_instr(uqxtn2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2i64.p0v2i64"
-        )]
-        fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t;
-    }
-    _vld3q_s64(a as _)
+pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3])
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -> float64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2f64.p0i8"
+            link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64"
         )]
-        fn _vld3q_lane_f64(
-            a: float64x2_t,
-            b: float64x2_t,
-            c: float64x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float64x2x3_t;
+        fn _vqmovnd_s64(a: i64) -> i32;
     }
-    _vld3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) -> poly64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+    _vqmovnd_s64(a)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s8)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64"
         )]
-        fn _vld3q_lane_s8(
-            a: int8x16_t,
-            b: int8x16_t,
-            c: int8x16_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x16x3_t;
+        fn _vqmovnd_u64(a: i64) -> i32;
     }
-    _vld3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+    _vqmovnd_u64(a.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s64)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) -> int64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2i64.p0i8"
-        )]
-        fn _vld3q_lane_s64(
-            a: int64x2_t,
-            b: int64x2_t,
-            c: int64x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int64x2x3_t;
-    }
-    _vld3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
+    simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u8)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> uint8x16x3_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqmovns_s32(a: i32) -> i16 {
+    simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u64)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) -> uint64x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3q_lane_s64::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
+    simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p8)"]
+#[doc = "Saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> poly8x16x3_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld3q_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vqmovns_u32(a: u32) -> u16 {
+    simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
-    transmute(vld3q_s64(transmute(a)))
+pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    simd_shuffle!(
+        a,
+        vqmovun_s16(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
-    transmute(vld3q_s64(transmute(a)))
+pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v1f64.p0f64"
-        )]
-        fn _vld4_dup_f64(ptr: *const f64) -> float64x1x4_t;
-    }
-    _vld4_dup_f64(a as _)
+pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3])
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2f64.p0f64"
-        )]
-        fn _vld4q_dup_f64(ptr: *const f64) -> float64x2x4_t;
-    }
-    _vld4q_dup_f64(a as _)
+pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
+    simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2i64.p0i64"
-        )]
-        fn _vld4q_dup_s64(ptr: *const i64) -> int64x2x4_t;
-    }
-    _vld4q_dup_s64(a as _)
+pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
+    simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v1f64.p0v1f64"
-        )]
-        fn _vld4_f64(ptr: *const float64x1_t) -> float64x1x4_t;
-    }
-    _vld4_f64(a as _)
+pub unsafe fn vqmovund_s64(a: i64) -> u32 {
+    simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) -> float64x1x4_t {
-    static_assert!(LANE == 0);
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v1f64.p0i8"
+            link_name = "llvm.aarch64.neon.sqneg.v1i64"
         )]
-        fn _vld4_lane_f64(
-            a: float64x1_t,
-            b: float64x1_t,
-            c: float64x1_t,
-            d: float64x1_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float64x1x4_t;
+        fn _vqneg_s64(a: int64x1_t) -> int64x1_t;
     }
-    _vld4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqneg_s64(a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> int64x1x4_t {
-    static_assert!(LANE == 0);
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v1i64.p0i8"
+            link_name = "llvm.aarch64.neon.sqneg.v2i64"
         )]
-        fn _vld4_lane_s64(
-            a: int64x1_t,
-            b: int64x1_t,
-            c: int64x1_t,
-            d: int64x1_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int64x1x4_t;
+        fn _vqnegq_s64(a: int64x2_t) -> int64x2_t;
     }
-    _vld4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqnegq_s64(a)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) -> poly64x1x4_t {
-    static_assert!(LANE == 0);
-    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegb_s8(a: i8) -> i8 {
+    simd_extract!(vqneg_s8(vdup_n_s8(a)), 0)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) -> uint64x1x4_t {
-    static_assert!(LANE == 0);
-    transmute(vld4_lane_s64::<LANE>(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegh_s16(a: i16) -> i16 {
+    simd_extract!(vqneg_s16(vdup_n_s16(a)), 0)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
-    transmute(vld4q_dup_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegs_s32(a: i32) -> i32 {
+    simd_extract!(vqneg_s32(vdup_n_s32(a)), 0)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
-    transmute(vld4q_dup_s64(transmute(a)))
+#[cfg_attr(test, assert_instr(sqneg))]
+pub unsafe fn vqnegd_s64(a: i64) -> i64 {
+    simd_extract!(vqneg_s64(vdup_n_s64(a)), 0)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2f64.p0v2f64"
-        )]
-        fn _vld4q_f64(ptr: *const float64x2_t) -> float64x2x4_t;
-    }
-    _vld4q_f64(a as _)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlah_s16(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlah_s32(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlahq_s16(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlahq_s16(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlahq_s32(a, b, c)
+}
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2i64.p0v2i64"
+            link_name = "llvm.aarch64.neon.sqrdmlah.v4i16"
         )]
-        fn _vld4q_s64(ptr: *const int64x2_t) -> int64x2x4_t;
+        fn _vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
-    _vld4q_s64(a as _)
+    _vqrdmlah_s16(a, b, c)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -> float64x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2f64.p0i8"
+            link_name = "llvm.aarch64.neon.sqrdmlah.v8i16"
         )]
-        fn _vld4q_lane_f64(
-            a: float64x2_t,
-            b: float64x2_t,
-            c: float64x2_t,
-            d: float64x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float64x2x4_t;
+        fn _vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
-    _vld4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqrdmlahq_s16(a, b, c)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s8)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.sqrdmlah.v2i32"
         )]
-        fn _vld4q_lane_s8(
-            a: int8x16_t,
-            b: int8x16_t,
-            c: int8x16_t,
-            d: int8x16_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x16x4_t;
+        fn _vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
-    _vld4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqrdmlah_s32(a, b, c)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) -> int64x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2i64.p0i8"
+            link_name = "llvm.aarch64.neon.sqrdmlah.v4i32"
         )]
-        fn _vld4q_lane_s64(
-            a: int64x2_t,
-            b: int64x2_t,
-            c: int64x2_t,
-            d: int64x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int64x2x4_t;
+        fn _vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    _vld4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vqrdmlahq_s32(a, b, c)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) -> poly64x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u8)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> uint8x16x4_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) -> uint64x2x4_t {
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4q_lane_s64::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> poly8x16x4_t {
-    static_assert_uimm_bits!(LANE, 4);
-    transmute(vld4q_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
-    transmute(vld4q_s64(transmute(a)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
-    transmute(vld4q_s64(transmute(a)))
+    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmax))]
-pub unsafe fn vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v1f64"
-        )]
-        fn _vmax_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    _vmax_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmax))]
-pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v2f64"
-        )]
-        fn _vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vmaxq_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract!(vqrdmlah_s16(a, b, c), 0)
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f64)"]
+#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
-        )]
-        fn _vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    _vmaxnm_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract!(vqrdmlah_s32(a, b, c), 0)
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxnm))]
-pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
-        )]
-        fn _vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vmaxnmq_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
 }
-#[doc = "Floating-point maximum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-pub unsafe fn vmaxnmv_f32(a: float32x2_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
-        )]
-        fn _vmaxnmv_f32(a: float32x2_t) -> f32;
-    }
-    _vmaxnmv_f32(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlsh_s32(a, b, c)
 }
-#[doc = "Floating-point maximum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
-pub unsafe fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
-        )]
-        fn _vmaxnmvq_f64(a: float64x2_t) -> f64;
-    }
-    _vmaxnmvq_f64(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlsh_s16(a, b, c)
 }
-#[doc = "Floating-point maximum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxnmv))]
-pub unsafe fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
-        )]
-        fn _vmaxnmvq_f32(a: float32x4_t) -> f32;
-    }
-    _vmaxnmvq_f32(a)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vqrdmlsh_s32(a, b, c)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmin))]
-pub unsafe fn vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v1f64"
-        )]
-        fn _vmin_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    _vmin_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlshq_s16(a, b, c)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmin))]
-pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v2f64"
-        )]
-        fn _vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vminq_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s32(a, b, c)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fminnm))]
-pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v1f64"
-        )]
-        fn _vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    _vminnm_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let c: int16x8_t = simd_shuffle!(
+        c,
+        c,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmlshq_s16(a, b, c)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fminnm))]
-pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v2f64"
-        )]
-        fn _vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vminnmq_f64(a, b)
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmlshq_s32(a, b, c)
 }
-#[doc = "Floating-point minimum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmv_f32(a: float32x2_t) -> f32 {
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16"
         )]
-        fn _vminnmv_f32(a: float32x2_t) -> f32;
+        fn _vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
-    _vminnmv_f32(a)
+    _vqrdmlsh_s16(a, b, c)
 }
-#[doc = "Floating-point minimum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmvq_f64(a: float64x2_t) -> f64 {
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16"
         )]
-        fn _vminnmvq_f64(a: float64x2_t) -> f64;
+        fn _vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
-    _vminnmvq_f64(a)
+    _vqrdmlshq_s16(a, b, c)
 }
-#[doc = "Floating-point minimum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminnmvq_f32(a: float32x4_t) -> f32 {
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32"
         )]
-        fn _vminnmvq_f32(a: float32x4_t) -> f32;
+        fn _vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
-    _vminnmvq_f32(a)
-}
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmla_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    simd_add(a, simd_mul(b, c))
+    _vqrdmlsh_s32(a, b, c)
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f64)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    simd_add(a, simd_mul(b, c))
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32"
+        )]
+        fn _vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vqrdmlshq_s32(a, b, c)
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int32x4_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int32x4_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    vmlal_high_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int64x2_t {
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    vmlal_high_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
-}
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
-}
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x8_t,
-    c: uint16x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x8_t,
-    c: uint16x8_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_high_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_lane_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x4_t,
-    c: uint32x2_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_high_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    let c: int16x4_t = vdup_n_s16(c);
+    simd_extract!(vqrdmlsh_s16(a, b, c), 0)
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
+#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_laneq_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_high_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[target_feature(enable = "rdm")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
+pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
+    let a: int32x2_t = vdup_n_s32(a);
+    let b: int32x2_t = vdup_n_s32(b);
+    let c: int32x2_t = vdup_n_s32(c);
+    simd_extract!(vqrdmlsh_s32(a, b, c), 0)
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vmlal_high_s16(a, b, vdupq_n_s16(c))
+pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vmlal_high_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(LANE, 3);
+    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
-    vmlal_high_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 1);
+    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
-    vmlal_high_u32(a, b, vdupq_n_u32(c))
+pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(LANE, 2);
+    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlal_s8(a, b, c)
+pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
+    simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlal_s16(a, b, c)
+pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
+    simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlal_s32(a, b, c)
+pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqrshl_s8(a, b), 0)
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlal_u8(a, b, c)
+pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqrshl_s16(a, b), 0)
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlal_u16(a, b, c)
+pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqrshl_u8(a, b), 0)
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlal_u32(a, b, c)
+pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqrshl_u16(a, b), 0)
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmls_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.i64"
+        )]
+        fn _vqrshld_s64(a: i64, b: i64) -> i64;
+    }
+    _vqrshld_s64(a, b)
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f64)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.i32"
+        )]
+        fn _vqrshls_s32(a: i32, b: i32) -> i32;
+    }
+    _vqrshls_s32(a, b)
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.i32"
+        )]
+        fn _vqrshls_u32(a: i32, b: i32) -> i32;
+    }
+    _vqrshls_u32(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_high_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.i64"
+        )]
+        fn _vqrshld_u64(a: i64, b: i64) -> i64;
+    }
+    _vqrshld_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_high_s32(
+pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
         a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        vqrshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
     )
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x8_t,
-    c: uint16x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x8_t,
-    c: uint16x8_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_high_u16(
+pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
         a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
+        vqrshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
     )
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_lane_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x4_t,
-    c: uint32x2_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_high_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_laneq_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_high_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vmlsl_high_s16(a, b, vdupq_n_s16(c))
+pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: uint64x2_t = vdupq_n_u64(a);
+    simd_extract!(vqrshrn_n_u64::<N>(a), 0)
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vmlsl_high_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: uint16x8_t = vdupq_n_u16(a);
+    simd_extract!(vqrshrn_n_u16::<N>(a), 0)
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"]
+#[doc = "Unsigned saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
-    vmlsl_high_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: uint32x4_t = vdupq_n_u32(a);
+    simd_extract!(vqrshrn_n_u32::<N>(a), 0)
 }
-#[doc = "Multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
-    vmlsl_high_u32(a, b, vdupq_n_u32(c))
+pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract!(vqrshrn_n_s16::<N>(a), 0)
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlsl_s8(a, b, c)
+pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract!(vqrshrn_n_s32::<N>(a), 0)
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlsl_s16(a, b, c)
+pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract!(vqrshrn_n_s64::<N>(a), 0)
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlsl_s32(a, b, c)
+pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqrshrun_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmlsl_u8(a, b, c)
+pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-    vmlsl_u16(a, b, c)
+pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-    vmlsl_u32(a, b, c)
+pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    let a: int64x2_t = vdupq_n_s64(a);
+    simd_extract!(vqrshrun_n_s64::<N>(a), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
-pub unsafe fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
-    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmovl_s8(a)
+pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    let a: int16x8_t = vdupq_n_s16(a);
+    simd_extract!(vqrshrun_n_s16::<N>(a), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
-pub unsafe fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vmovl_s16(a)
+pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    let a: int32x4_t = vdupq_n_s32(a);
+    simd_extract!(vqrshrun_n_s32::<N>(a), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
-pub unsafe fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vmovl_s32(a)
+pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
-pub unsafe fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
-    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmovl_u8(a)
+pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
-pub unsafe fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
-    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vmovl_u16(a)
+pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
 }
-#[doc = "Vector move"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
-pub unsafe fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
-    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vmovl_u32(a)
+pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    let c: int8x8_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    let c: int16x4_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    let c: int32x2_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3])
+pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    let c: uint8x8_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    let c: uint16x4_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
+    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
-pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    let c: uint32x2_t = simd_cast(b);
-    simd_shuffle!(a, c, [0, 1, 2, 3])
+pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
+    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmul))]
-pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_mul(a, b)
+#[cfg_attr(test, assert_instr(sqshl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
+    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmul))]
-pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_mul(a, b)
+pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
+    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
+    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
+    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
+    simd_extract!(c, 0)
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
-    simd_mul(a, vdup_n_f64(b))
+pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.i64"
+        )]
+        fn _vqshld_s64(a: i64, b: i64) -> i64;
+    }
+    _vqshld_s64(a, b)
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul))]
+#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
-    simd_mul(a, vdupq_n_f64(b))
+pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.i64"
+        )]
+        fn _vqshld_u64(a: i64, b: i64) -> i64;
+    }
+    _vqshld_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_lane_f64)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlub_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
-    static_assert!(LANE == 0);
-    let b: f64 = simd_extract!(b, LANE as u32);
-    a * b
+pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
+    static_assert_uimm_bits!(N, 3);
+    simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_s16(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
+    static_assert_uimm_bits!(N, 6);
+    simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_high_s16(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
+    static_assert_uimm_bits!(N, 4);
+    simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_high_s32(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
+    static_assert_uimm_bits!(N, 5);
+    simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_s32(
+pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        vqshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
     )
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_u16(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_high_u16(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_high_u32(
+pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        vqshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
     )
-}
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_high_u32(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    vmull_high_s16(a, vdupq_n_s16(b))
+pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    vmull_high_s32(a, vdupq_n_s32(b))
+pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.i32"
+        )]
+        fn _vqshrnd_n_s64(a: i64, n: i32) -> i32;
+    }
+    _vqshrnd_n_s64(a, N)
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
-    vmull_high_u16(a, vdupq_n_u16(b))
+pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.i32"
+        )]
+        fn _vqshrnd_n_u64(a: i64, n: i32) -> i32;
+    }
+    _vqshrnd_n_u64(a.as_signed(), N).as_unsigned()
 }
-#[doc = "Multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
-    vmull_high_u32(a, vdupq_n_u32(b))
+pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p64)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull))]
-pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
-    vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1))
+pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull))]
-pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
-    let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_p8(a, b)
+pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
-pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_s8(a, b)
+pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
-pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vmull_s16(a, b)
+pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vqshrun_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
-pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vmull_s32(a, b)
+pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
-pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vmull_u8(a, b)
+pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
-pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vmull_u16(a, b)
+pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
+    static_assert!(N >= 1 && N <= 32);
+    simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
-pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vmull_u32(a, b)
+pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
+    static_assert!(N >= 1 && N <= 8);
+    simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull))]
-pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmull64"
-        )]
-        fn _vmull_p64(a: p64, b: p64) -> int8x16_t;
-    }
-    transmute(_vmull_p64(a, b))
+pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
+    static_assert!(N >= 1 && N <= 16);
+    simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f64)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
-    static_assert!(LANE == 0);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 {
+    let a: int8x8_t = vdup_n_s8(a);
+    let b: int8x8_t = vdup_n_s8(b);
+    simd_extract!(vqsub_s8(a, b), 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
+    let a: int16x4_t = vdup_n_s16(a);
+    let b: int16x4_t = vdup_n_s16(b);
+    simd_extract!(vqsub_s16(a, b), 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: f32 = simd_extract!(b, LANE as u32);
-    a * b
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 {
+    let a: uint8x8_t = vdup_n_u8(a);
+    let b: uint8x8_t = vdup_n_u8(b);
+    simd_extract!(vqsub_u8(a, b), 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: f32 = simd_extract!(b, LANE as u32);
-    a * b
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
+    let a: uint16x4_t = vdup_n_u16(a);
+    let b: uint16x4_t = vdup_n_u16(b);
+    simd_extract!(vqsub_u16(a, b), 0)
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: f64 = simd_extract!(b, LANE as u32);
-    a * b
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.i32"
+        )]
+        fn _vqsubs_s32(a: i32, b: i32) -> i32;
+    }
+    _vqsubs_s32(a, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg_attr(test, assert_instr(sqsub))]
+pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.v2f32"
+            link_name = "llvm.aarch64.neon.sqsub.i64"
         )]
-        fn _vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vqsubd_s64(a: i64, b: i64) -> i64;
     }
-    _vmulx_f32(a, b)
+    _vqsubd_s64(a, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.v4f32"
+            link_name = "llvm.aarch64.neon.uqsub.i32"
         )]
-        fn _vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vqsubs_u32(a: i32, b: i32) -> i32;
     }
-    _vmulxq_f32(a, b)
+    _vqsubs_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f64)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(uqsub))]
+pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.v1f64"
+            link_name = "llvm.aarch64.neon.uqsub.i64"
         )]
-        fn _vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        fn _vqsubd_u64(a: i64, b: i64) -> i64;
     }
-    _vmulx_f64(a, b)
+    _vqsubd_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+unsafe fn vqtbl1(a: int8x16_t, b: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.v2f64"
+            link_name = "llvm.aarch64.neon.tbl1.v8i8"
         )]
-        fn _vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vqtbl1(a: int8x16_t, b: int8x8_t) -> int8x8_t;
     }
-    _vmulxq_f64(a, b)
+    _vqtbl1(a, b.as_signed())
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+unsafe fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.tbl1.v16i8"
+        )]
+        fn _vqtbl1q(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqtbl1q(a, b.as_signed())
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vqtbl1_s8(a: int8x16_t, b: uint8x8_t) -> int8x8_t {
+    vqtbl1(a, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxq_f32(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqtbl1q_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl1q(a, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulxq_f32(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vqtbl1_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x8_t {
+    let x = transmute(vqtbl1(transmute(a), b));
+    x
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vqtbl1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let x = transmute(vqtbl1q(transmute(a), b));
+    x
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqtbl1_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x8_t {
+    let x = transmute(vqtbl1(transmute(a), b));
+    x
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl1q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqtbl1q_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
+    let x = transmute(vqtbl1q(transmute(a), b));
+    x
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
+unsafe fn vqtbl2(a: int8x16_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.f64"
+            link_name = "llvm.aarch64.neon.tbl2.v8i8"
         )]
-        fn _vmulxd_f64(a: f64, b: f64) -> f64;
+        fn _vqtbl2(a: int8x16_t, b: int8x16_t, c: int8x8_t) -> int8x8_t;
     }
-    _vmulxd_f64(a, b)
+    _vqtbl2(a, b, c.as_signed())
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmulx))]
-pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
+unsafe fn vqtbl2q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmulx.f32"
+            link_name = "llvm.aarch64.neon.tbl2.v16i8"
         )]
-        fn _vmulxs_f32(a: f32, b: f32) -> f32;
+        fn _vqtbl2q(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
     }
-    _vmulxs_f32(a, b)
-}
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_lane_f64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
-    static_assert!(LANE == 0);
-    vmulxd_f64(a, simd_extract!(b, LANE as u32))
-}
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxd_f64(a, simd_extract!(b, LANE as u32))
+    _vqtbl2q(a, b, c.as_signed())
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vmulxs_f32(a, simd_extract!(b, LANE as u32))
+pub unsafe fn vqtbl2_s8(a: int8x16x2_t, b: uint8x8_t) -> int8x8_t {
+    vqtbl2(a.0, a.1, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vmulxs_f32(a, simd_extract!(b, LANE as u32))
+pub unsafe fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl2q(a.0, a.1, b)
 }
-#[doc = "Floating-point multiply extended"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
-    static_assert!(LANE == 0);
-    vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl2(transmute(a.0), transmute(a.1), b))
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fneg))]
-pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
-    simd_neg(a)
+pub unsafe fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x2_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fneg))]
-pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
-    simd_neg(a)
+pub unsafe fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl2q(transmute(a.0), transmute(a.1), b))
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(neg))]
-pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
-    simd_neg(a)
+pub unsafe fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x2_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(neg))]
-pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
-    simd_neg(a)
+pub unsafe fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl2(transmute(a.0), transmute(a.1), b))
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegd_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(neg))]
-pub unsafe fn vnegd_s64(a: i64) -> i64 {
-    a.wrapping_neg()
+pub unsafe fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x2_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vpaddd_f64(a: float64x2_t) -> f64 {
-    let a1: f64 = simd_extract!(a, 0);
-    let a2: f64 = simd_extract!(a, 1);
-    a1 + a2
+pub unsafe fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl2q(transmute(a.0), transmute(a.1), b))
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vpadds_f32(a: float32x2_t) -> f32 {
-    let a1: f32 = simd_extract!(a, 0);
-    let a2: f32 = simd_extract!(a, 1);
-    a1 + a2
+pub unsafe fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x2_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(faddp))]
-pub unsafe fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+unsafe fn vqtbl3(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v4f32"
+            link_name = "llvm.aarch64.neon.tbl3.v8i8"
         )]
-        fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vqtbl3(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x8_t) -> int8x8_t;
     }
-    _vpaddq_f32(a, b)
+    _vqtbl3(a, b, c, d.as_signed())
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(faddp))]
-pub unsafe fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+unsafe fn vqtbl3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v2f64"
+            link_name = "llvm.aarch64.neon.tbl3.v16i8"
         )]
-        fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vqtbl3q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t) -> int8x16_t;
     }
-    _vpaddq_f64(a, b)
+    _vqtbl3q(a, b, c, d.as_signed())
 }
-#[doc = "Floating-point Maximum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmp.v2f32"
-        )]
-        fn _vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    _vpmaxnm_f32(a, b)
+pub unsafe fn vqtbl3_s8(a: int8x16x3_t, b: uint8x8_t) -> int8x8_t {
+    vqtbl3(a.0, a.1, a.2, b)
 }
-#[doc = "Floating-point Maximum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmp.v4f32"
-        )]
-        fn _vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    _vpmaxnmq_f32(a, b)
+pub unsafe fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl3q(a.0, a.1, a.2, b)
 }
-#[doc = "Floating-point Maximum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmp.v2f64"
-        )]
-        fn _vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vpmaxnmq_f64(a, b)
+pub unsafe fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b))
 }
-#[doc = "Floating-point maximum number pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmqd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnmqd_f64(a: float64x2_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
-        )]
-        fn _vpmaxnmqd_f64(a: float64x2_t) -> f64;
-    }
-    _vpmaxnmqd_f64(a)
+pub unsafe fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x3_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Floating-point maximum number pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnms_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxnms_f32(a: float32x2_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
-        )]
-        fn _vpmaxnms_f32(a: float32x2_t) -> f32;
-    }
-    _vpmaxnms_f32(a)
+pub unsafe fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b))
 }
-#[doc = "Floating-point maximum pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxqd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-pub unsafe fn vpmaxqd_f64(a: float64x2_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"
-        )]
-        fn _vpmaxqd_f64(a: float64x2_t) -> f64;
-    }
-    _vpmaxqd_f64(a)
+pub unsafe fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x3_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Floating-point maximum pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxs_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-pub unsafe fn vpmaxs_f32(a: float32x2_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"
-        )]
-        fn _vpmaxs_f32(a: float32x2_t) -> f32;
-    }
-    _vpmaxs_f32(a)
+pub unsafe fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b))
 }
-#[doc = "Floating-point Minimum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmp.v2f32"
-        )]
-        fn _vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    _vpminnm_f32(a, b)
+pub unsafe fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x3_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Floating-point Minimum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmp.v4f32"
-        )]
-        fn _vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    _vpminnmq_f32(a, b)
+pub unsafe fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b))
 }
-#[doc = "Floating-point Minimum Number Pairwise (vector)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmp.v2f64"
-        )]
-        fn _vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vpminnmq_f64(a, b)
+pub unsafe fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x3_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Floating-point minimum number pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmqd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnmqd_f64(a: float64x2_t) -> f64 {
+unsafe fn vqtbl4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+            link_name = "llvm.aarch64.neon.tbl4.v8i8"
         )]
-        fn _vpminnmqd_f64(a: float64x2_t) -> f64;
+        fn _vqtbl4(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: int8x8_t)
+            -> int8x8_t;
     }
-    _vpminnmqd_f64(a)
+    _vqtbl4(a, b, c, d, e.as_signed())
 }
-#[doc = "Floating-point minimum number pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnms_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminnmp))]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminnms_f32(a: float32x2_t) -> f32 {
+unsafe fn vqtbl4q(
+    a: int8x16_t,
+    b: int8x16_t,
+    c: int8x16_t,
+    d: int8x16_t,
+    e: uint8x16_t,
+) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+            link_name = "llvm.aarch64.neon.tbl4.v16i8"
         )]
-        fn _vpminnms_f32(a: float32x2_t) -> f32;
+        fn _vqtbl4q(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            e: int8x16_t,
+        ) -> int8x16_t;
     }
-    _vpminnms_f32(a)
+    _vqtbl4q(a, b, c, d, e.as_signed())
 }
-#[doc = "Floating-point minimum pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminqd_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fminp))]
-pub unsafe fn vpminqd_f64(a: float64x2_t) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminv.f64.v2f64"
-        )]
-        fn _vpminqd_f64(a: float64x2_t) -> f64;
-    }
-    _vpminqd_f64(a)
+pub unsafe fn vqtbl4_s8(a: int8x16x4_t, b: uint8x8_t) -> int8x8_t {
+    vqtbl4(a.0, a.1, a.2, a.3, b)
 }
-#[doc = "Floating-point minimum pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmins_f32)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fminp))]
-pub unsafe fn vpmins_f32(a: float32x2_t) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminv.f32.v2f32"
-        )]
-        fn _vpmins_f32(a: float32x2_t) -> f32;
-    }
-    _vpmins_f32(a)
+pub unsafe fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t {
+    vqtbl4q(a.0, a.1, a.2, a.3, b)
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabs_s64(a: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v1i64"
-        )]
-        fn _vqabs_s64(a: int64x1_t) -> int64x1_t;
-    }
-    _vqabs_s64(a)
+pub unsafe fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v2i64"
-        )]
-        fn _vqabsq_s64(a: int64x2_t) -> int64x2_t;
-    }
-    _vqabsq_s64(a)
+pub unsafe fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x16x4_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.3 = simd_shuffle!(
+        a.3,
+        a.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbl4q(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
+    let mut a: uint8x16x4_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.3 = simd_shuffle!(
+        a.3,
+        a.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbl4q(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x16x4_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.3 = simd_shuffle!(
+        a.3,
+        a.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsb_s8)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabsb_s8(a: i8) -> i8 {
-    simd_extract!(vqabs_s8(vdup_n_s8(a)), 0)
+pub unsafe fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbl4q(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ))
 }
-#[doc = "Signed saturating absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabsh_s16(a: i16) -> i16 {
-    simd_extract!(vqabs_s16(vdup_n_s16(a)), 0)
+pub unsafe fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
+    let mut a: poly8x16x4_t = a;
+    a.0 = simd_shuffle!(
+        a.0,
+        a.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.1 = simd_shuffle!(
+        a.1,
+        a.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.2 = simd_shuffle!(
+        a.2,
+        a.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    a.3 = simd_shuffle!(
+        a.3,
+        a.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let b: uint8x16_t = simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbl4q(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        b,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabss_s32(a: i32) -> i32 {
+unsafe fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.i32"
+            link_name = "llvm.aarch64.neon.tbx1.v8i8"
         )]
-        fn _vqabss_s32(a: i32) -> i32;
+        fn _vqtbx1(a: int8x8_t, b: int8x16_t, c: int8x8_t) -> int8x8_t;
     }
-    _vqabss_s32(a)
+    _vqtbx1(a, b, c.as_signed())
 }
-#[doc = "Signed saturating absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsd_s64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
-pub unsafe fn vqabsd_s64(a: i64) -> i64 {
+unsafe fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.i64"
+            link_name = "llvm.aarch64.neon.tbx1.v16i8"
         )]
-        fn _vqabsd_s64(a: i64) -> i64;
+        fn _vqtbx1q(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
     }
-    _vqabsd_s64(a)
+    _vqtbx1q(a, b, c.as_signed())
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_s8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqadd))]
-pub unsafe fn vqaddb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqadd_s8(a, b), 0)
+pub unsafe fn vqtbx1_s8(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t {
+    vqtbx1(a, b, c)
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqadd))]
-pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqadd_s16(a, b), 0)
+pub unsafe fn vqtbx1q_s8(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t {
+    vqtbx1q(a, b, c)
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqadd))]
-pub unsafe fn vqaddb_u8(a: u8, b: u8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: uint8x8_t = vdup_n_u8(b);
-    simd_extract!(vqadd_u8(a, b), 0)
+pub unsafe fn vqtbx1_u8(a: uint8x8_t, b: uint8x16_t, c: uint8x8_t) -> uint8x8_t {
+    let x = transmute(vqtbx1(transmute(a), transmute(b), c));
+    x
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqadd))]
-pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: uint16x4_t = vdup_n_u16(b);
-    simd_extract!(vqadd_u16(a, b), 0)
+pub unsafe fn vqtbx1q_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
+    x
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqadd))]
-pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.i32"
-        )]
-        fn _vqadds_s32(a: i32, b: i32) -> i32;
-    }
-    _vqadds_s32(a, b)
+pub unsafe fn vqtbx1_p8(a: poly8x8_t, b: poly8x16_t, c: uint8x8_t) -> poly8x8_t {
+    let x = transmute(vqtbx1(transmute(a), transmute(b), c));
+    x
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_s64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqadd))]
-pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.i64"
-        )]
-        fn _vqaddd_s64(a: i64, b: i64) -> i64;
-    }
-    _vqaddd_s64(a, b)
+pub unsafe fn vqtbx1q_p8(a: poly8x16_t, b: poly8x16_t, c: uint8x16_t) -> poly8x16_t {
+    let x = transmute(vqtbx1q(transmute(a), transmute(b), c));
+    x
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_u32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqadd))]
-pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
+unsafe fn vqtbx2(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: uint8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.i32"
+            link_name = "llvm.aarch64.neon.tbx2.v8i8"
         )]
-        fn _vqadds_u32(a: i32, b: i32) -> i32;
+        fn _vqtbx2(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x8_t) -> int8x8_t;
     }
-    _vqadds_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vqtbx2(a, b, c, d.as_signed())
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddd_u64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqadd))]
-pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
+unsafe fn vqtbx2q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: uint8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.i64"
+            link_name = "llvm.aarch64.neon.tbx2.v16i8"
         )]
-        fn _vqaddd_u64(a: i64, b: i64) -> i64;
+        fn _vqtbx2q(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t) -> int8x16_t;
     }
-    _vqaddd_u64(a.as_signed(), b.as_signed()).as_unsigned()
-}
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_lane_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+    _vqtbx2q(a, b, c, d.as_signed())
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_laneq_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqaddq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+pub unsafe fn vqtbx2_s8(a: int8x8_t, b: int8x16x2_t, c: uint8x8_t) -> int8x8_t {
+    vqtbx2(a, b.0, b.1, c)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_lane_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+pub unsafe fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t {
+    vqtbx2q(a, b.0, b.1, c)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_laneq_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+pub unsafe fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c))
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_n_s16(b, c))
+pub unsafe fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x2_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_high_s16(b, c))
+pub unsafe fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c))
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_n_s32(b, c))
+pub unsafe fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x2_t = b;
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_high_s32(b, c))
+pub unsafe fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c))
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_laneq_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqaddq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+pub unsafe fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x2_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlal_laneq_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+pub unsafe fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c))
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_lane_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x2_t = b;
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32))
+unsafe fn vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: uint8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.tbx3.v8i8"
+        )]
+        fn _vqtbx3(a: int8x8_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, e: int8x8_t) -> int8x8_t;
+    }
+    _vqtbx3(a, b, c, d, e.as_signed())
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
+unsafe fn vqtbx3q(
+    a: int8x16_t,
+    b: int8x16_t,
+    c: int8x16_t,
+    d: int8x16_t,
+    e: uint8x16_t,
+) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.tbx3.v16i8"
+        )]
+        fn _vqtbx3q(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            e: int8x16_t,
+        ) -> int8x16_t;
+    }
+    _vqtbx3q(a, b, c, d, e.as_signed())
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlals_s32(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx3_s8(a: int8x8_t, b: int8x16x3_t, c: uint8x8_t) -> int8x8_t {
+    vqtbx3(a, b.0, b.1, b.2, c)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
-    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    vqadds_s32(a, simd_extract!(x, 0))
+pub unsafe fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t {
+    vqtbx3q(a, b.0, b.1, b.2, c)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
-    let x: i64 = vqaddd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+pub unsafe fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_lane_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_high_lane_s16::<N>(b, c))
+pub unsafe fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x3_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_laneq_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqsubq_s32(a, vqdmull_high_laneq_s16::<N>(b, c))
+pub unsafe fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx3q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_lane_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_high_lane_s32::<N>(b, c))
+pub unsafe fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x3_t = b;
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbx3q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_laneq_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s64(a, vqdmull_high_laneq_s32::<N>(b, c))
+pub unsafe fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_n_s16(b, c))
+pub unsafe fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x3_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_high_s16(b, c))
+pub unsafe fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx3q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_n_s32(b, c))
+pub unsafe fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x3_t = b;
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbx3q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        c,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_high_s32(b, c))
+unsafe fn vqtbx4(
+    a: int8x8_t,
+    b: int8x16_t,
+    c: int8x16_t,
+    d: int8x16_t,
+    e: int8x16_t,
+    f: uint8x8_t,
+) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.tbx4.v8i8"
+        )]
+        fn _vqtbx4(
+            a: int8x8_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            e: int8x16_t,
+            f: int8x8_t,
+        ) -> int8x8_t;
+    }
+    _vqtbx4(a, b, c, d, e, f.as_signed())
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_laneq_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    vqsubq_s32(a, vqdmull_laneq_s16::<N>(b, c))
+unsafe fn vqtbx4q(
+    a: int8x16_t,
+    b: int8x16_t,
+    c: int8x16_t,
+    d: int8x16_t,
+    e: int8x16_t,
+    f: uint8x16_t,
+) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.tbx4.v16i8"
+        )]
+        fn _vqtbx4q(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            e: int8x16_t,
+            f: int8x16_t,
+        ) -> int8x16_t;
+    }
+    _vqtbx4q(a, b, c, d, e, f.as_signed())
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsl_laneq_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s64(a, vqdmull_laneq_s32::<N>(b, c))
+pub unsafe fn vqtbx4_s8(a: int8x8_t, b: int8x16x4_t, c: uint8x8_t) -> int8x8_t {
+    vqtbx4(a, b.0, b.1, b.2, b.3, c)
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_lane_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t {
+    vqtbx4q(a, b.0, b.1, b.2, b.3, c)
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x16x4_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.3 = simd_shuffle!(
+        b.3,
+        b.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32))
+pub unsafe fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
+    transmute(vqtbx4q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
-    let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    vqsubs_s32(a, simd_extract!(x, 0))
+pub unsafe fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
+    let mut b: uint8x16x4_t = b;
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.3 = simd_shuffle!(
+        b.3,
+        b.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vqtbx4q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl))]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
-    let x: i64 = vqsubd_s64(a, vqdmulls_s32(b, c));
-    x as i64
+pub unsafe fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ))
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x16x4_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.3 = simd_shuffle!(
+        b.3,
+        b.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
+    transmute(vqtbx4q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ))
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
+pub unsafe fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
+    let mut b: poly8x16x4_t = b;
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(
+        b.0,
+        b.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.1 = simd_shuffle!(
+        b.1,
+        b.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.2 = simd_shuffle!(
+        b.2,
+        b.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    b.3 = simd_shuffle!(
+        b.3,
+        b.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    let c: uint8x16_t = simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vqtbx4q(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"]
+#[doc = "Rotate and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(rax1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.rax1"
+        )]
+        fn _vrax1q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vrax1q_u64(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rbit.v8i8"
+        )]
+        fn _vrbit_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vrbit_s8(a)
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.rbit.v16i8"
+        )]
+        fn _vrbitq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vrbitq_s8(a)
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(N, 3);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmulhh_s16(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vrbit_s8(transmute(a)))
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqdmulh_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vrbit_s8(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    simd_extract!(vqdmulh_s32(a, b), 0)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(N, 1);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vrbitq_s8(transmute(a)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmulh, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulhs_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+    transmute(vrbit_s8(transmute(a)))
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vrbit_s8(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
+    transmute(vrbitq_s8(transmute(a)))
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
+#[doc = "Reverse bit order"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+#[cfg_attr(test, assert_instr(rbit))]
+pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vrbitq_s8(transmute(a)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v1f64"
+        )]
+        fn _vrecpe_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrecpe_f64(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmull_s16(a, b)
+pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f64"
+        )]
+        fn _vrecpeq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrecpeq_f64(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecped_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmull_s32(a, b)
+pub unsafe fn vrecped_f64(a: f64) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.f64"
+        )]
+        fn _vrecped_f64(a: f64) -> f64;
+    }
+    _vrecped_f64(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpes_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    vqdmull_s16(a, b)
+pub unsafe fn vrecpes_f32(a: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.f32"
+        )]
+        fn _vrecpes_f32(a: f32) -> f32;
+    }
+    _vrecpes_f32(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    vqdmull_s32(a, b)
+pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v1f64"
+        )]
+        fn _vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vrecps_f64(a, b)
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 3);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f64"
+        )]
+        fn _vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vrecpsq_f64(a, b)
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 2);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.f64"
+        )]
+        fn _vrecpsd_f64(a: f64, b: f64) -> f64;
+    }
+    _vrecpsd_f64(a, b)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpss_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmullh_s16(a, b)
+pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.f32"
+        )]
+        fn _vrecpss_f32(a: f32, b: f32) -> f32;
+    }
+    _vrecpss_f32(a, b)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"]
+#[doc = "Floating-point reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
-    static_assert_uimm_bits!(N, 2);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulls_s32(a, b)
+pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpx.f64"
+        )]
+        fn _vrecpxd_f64(a: f64) -> f64;
+    }
+    _vrecpxd_f64(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"]
+#[doc = "Floating-point reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxs_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 4))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(frecpx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
-    static_assert_uimm_bits!(N, 3);
-    let b: i16 = simd_extract!(b, N as u32);
-    vqdmullh_s16(a, b)
+pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpx.f32"
+        )]
+        fn _vrecpxs_f32(a: f32) -> f32;
+    }
+    _vrecpxs_f32(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmullh_s16(a: i16, b: i16) -> i32 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqdmull_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull, N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
-    static_assert_uimm_bits!(N, 1);
-    let b: i32 = simd_extract!(b, N as u32);
-    vqdmulls_s32(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqdmulls_s32(a: i32, b: i32) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulls.scalar"
-        )]
-        fn _vqdmulls_s32(a: i32, b: i32) -> i64;
-    }
-    _vqdmulls_s32(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    simd_shuffle!(
-        a,
-        vqmovn_s16(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    simd_shuffle!(
-        a,
-        vqmovn_u16(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+    transmute(a)
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64"
-        )]
-        fn _vqmovnd_s64(a: i64) -> i32;
-    }
-    _vqmovnd_s64(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64"
-        )]
-        fn _vqmovnd_u64(a: i64) -> i32;
-    }
-    _vqmovnd_u64(a.as_signed()).as_unsigned()
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    transmute(a)
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnh_s16(a: i16) -> i8 {
-    simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovns_s32(a: i32) -> i16 {
-    simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+    transmute(a)
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovnh_u16(a: u16) -> u8 {
-    simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovns_u32(a: u32) -> u16 {
-    simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    simd_shuffle!(
-        a,
-        vqmovun_s16(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovunh_s16(a: i16) -> u8 {
-    simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovuns_s32(a: i32) -> u16 {
-    simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqmovund_s64(a: i64) -> u32 {
-    simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v1i64"
-        )]
-        fn _vqneg_s64(a: int64x1_t) -> int64x1_t;
-    }
-    _vqneg_s64(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v2i64"
-        )]
-        fn _vqnegq_s64(a: int64x2_t) -> int64x2_t;
-    }
-    _vqnegq_s64(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegb_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqnegb_s8(a: i8) -> i8 {
-    simd_extract!(vqneg_s8(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqnegh_s16(a: i16) -> i16 {
-    simd_extract!(vqneg_s16(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqnegs_s32(a: i32) -> i32 {
-    simd_extract!(vqneg_s32(vdup_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqneg))]
-pub unsafe fn vqnegd_s64(a: i64) -> i64 {
-    simd_extract!(vqneg_s64(vdup_n_s64(a)), 0)
-}
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlah_s16(a, b, c)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlah_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlah_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlah_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x8_t = simd_shuffle!(
-        c,
-        c,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmlahq_s16(a, b, c)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s32(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x8_t = simd_shuffle!(
-        c,
-        c,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmlahq_s16(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlahq_s32(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlah.v4i16"
-        )]
-        fn _vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    _vqrdmlah_s16(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlah.v8i16"
-        )]
-        fn _vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    _vqrdmlahq_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlah.v2i32"
-        )]
-        fn _vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    _vqrdmlah_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlah.v4i32"
-        )]
-        fn _vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    _vqrdmlahq_s32(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    let c: int16x4_t = vdup_n_s16(c);
-    simd_extract!(vqrdmlah_s16(a, b, c), 0)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlah))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    let c: int32x2_t = vdup_n_s32(c);
-    simd_extract!(vqrdmlah_s32(a, b, c), 0)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_lane_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlsh_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_lane_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlsh_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_laneq_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlsh_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vqrdmlsh_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_lane_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int16x8_t = simd_shuffle!(
-        c,
-        c,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmlshq_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_lane_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_laneq_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let c: int16x8_t = simd_shuffle!(
-        c,
-        c,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmlshq_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmlshq_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i16"
-        )]
-        fn _vqrdmlsh_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
-    }
-    _vqrdmlsh_s16(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlsh.v8i16"
-        )]
-        fn _vqrdmlshq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
-    }
-    _vqrdmlshq_s16(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlsh.v2i32"
-        )]
-        fn _vqrdmlsh_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
-    }
-    _vqrdmlsh_s32(a, b, c)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmlsh.v4i32"
-        )]
-        fn _vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
-    }
-    _vqrdmlshq_s32(a, b, c)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+    let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh, LANE = 1))]
-#[rustc_legacy_const_generics(3)]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32))
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    let c: int16x4_t = vdup_n_s16(c);
-    simd_extract!(vqrdmlsh_s16(a, b, c), 0)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "rdm")]
-#[cfg_attr(test, assert_instr(sqrdmlsh))]
-#[stable(feature = "rdm_intrinsics", since = "1.62.0")]
-pub unsafe fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
-    let a: int32x2_t = vdup_n_s32(a);
-    let b: int32x2_t = vdup_n_s32(b);
-    let c: int32x2_t = vdup_n_s32(c);
-    simd_extract!(vqrdmlsh_s32(a, b, c), 0)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(LANE, 3);
-    vqrdmulhh_s16(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 1);
-    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(LANE, 2);
-    vqrdmulhs_s32(a, simd_extract!(b, LANE as u32))
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
-    simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
-    simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqrshl_s8(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqrshl_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
+    transmute(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqrshl_u8(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqrshl_u16(a, b), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.i64"
-        )]
-        fn _vqrshld_s64(a: i64, b: i64) -> i64;
-    }
-    _vqrshld_s64(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.i32"
-        )]
-        fn _vqrshls_s32(a: i32, b: i32) -> i32;
-    }
-    _vqrshls_s32(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshls_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.i32"
-        )]
-        fn _vqrshls_u32(a: i32, b: i32) -> i32;
-    }
-    _vqrshls_u32(a.as_signed(), b).as_unsigned()
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.i64"
-        )]
-        fn _vqrshld_u64(a: i64, b: i64) -> i64;
-    }
-    _vqrshld_u64(a.as_signed(), b).as_unsigned()
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqrshrn_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqrshrn_n_u16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: uint64x2_t = vdupq_n_u64(a);
-    simd_extract!(vqrshrn_n_u64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: uint16x8_t = vdupq_n_u16(a);
-    simd_extract!(vqrshrn_n_u16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
 }
-#[doc = "Unsigned saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: uint32x4_t = vdupq_n_u32(a);
-    simd_extract!(vqrshrn_n_u32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract!(vqrshrn_n_s16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract!(vqrshrn_n_s32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract!(vqrshrn_n_s64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqrshrun_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    let a: int64x2_t = vdupq_n_s64(a);
-    simd_extract!(vqrshrun_n_s64::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    let a: int16x8_t = vdupq_n_s16(a);
-    simd_extract!(vqrshrun_n_s16::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    let a: int32x4_t = vdupq_n_s32(a);
-    simd_extract!(vqrshrun_n_s32::<N>(a), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_s8(a: i8, b: i8) -> i8 {
-    let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_s16(a: i16, b: i16) -> i16 {
-    let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
-    let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlb_u8(a: u8, b: i8) -> u8 {
-    let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+    transmute(a)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlh_u16(a: u16, b: i16) -> u16 {
-    let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
-    let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
-    simd_extract!(c, 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.i64"
-        )]
-        fn _vqshld_s64(a: i64, b: i64) -> i64;
-    }
-    _vqshld_s64(a, b)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.i64"
-        )]
-        fn _vqshld_u64(a: i64, b: i64) -> i64;
-    }
-    _vqshld_u64(a.as_signed(), b).as_unsigned()
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlub_n_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
-    static_assert_uimm_bits!(N, 3);
-    simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
-    static_assert_uimm_bits!(N, 6);
-    simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
-    static_assert_uimm_bits!(N, 4);
-    simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f32"
+        )]
+        fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd32x_f32(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
-    static_assert_uimm_bits!(N, 5);
-    simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v4f32"
+        )]
+        fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd32xq_f32(a)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqshrn_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32x.v2f64"
+        )]
+        fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd32xq_f64(a)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
+#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
+pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32x.f64"
+        )]
+        fn _vrnd32x_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd32x_f64(simd_extract!(a, 0)))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f32"
+        )]
+        fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd32z_f32(a)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqshrn_n_u16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v4f32"
+        )]
+        fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd32zq_f32(a)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint32z.v2f64"
+        )]
+        fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd32zq_f64(a)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
+#[doc = "Floating-point round to 32-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
+pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint32z.f64"
+        )]
+        fn _vrnd32z_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd32z_f64(simd_extract!(a, 0)))
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
-    static_assert!(N >= 1 && N <= 32);
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.i32"
+            link_name = "llvm.aarch64.neon.frint64x.v2f32"
         )]
-        fn _vqshrnd_n_s64(a: i64, n: i32) -> i32;
+        fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t;
     }
-    _vqshrnd_n_s64(a, N)
+    _vrnd64x_f32(a)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_u64)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.i32"
+            link_name = "llvm.aarch64.neon.frint64x.v4f32"
         )]
-        fn _vqshrnd_n_u64(a: i64, n: i32) -> i32;
+        fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t;
     }
-    _vqshrnd_n_u64(a.as_signed(), N).as_unsigned()
+    _vrnd64xq_f32(a)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_s16)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64x.v2f64"
+        )]
+        fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd64xq_f64(a)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"]
+#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
+pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64x.f64"
+        )]
+        fn _vrnd64x_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd64x_f64(simd_extract!(a, 0)))
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f32"
+        )]
+        fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd64z_f32(a)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v4f32"
+        )]
+        fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrnd64zq_f32(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vqshrun_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frint64z.v2f64"
+        )]
+        fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrnd64zq_f64(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
+#[doc = "Floating-point round to 64-bit integer toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[target_feature(enable = "neon,frintts")]
+#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
+pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.frint64z.f64"
+        )]
+        fn _vrnd64z_f64(a: f64) -> f64;
+    }
+    transmute(_vrnd64z_f64(simd_extract!(a, 0)))
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3])
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v2f32"
+        )]
+        fn _vrnd_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnd_f32(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
-    static_assert!(N >= 1 && N <= 32);
-    simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v4f32"
+        )]
+        fn _vrndq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndq_f32(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
-    static_assert!(N >= 1 && N <= 8);
-    simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v1f64"
+        )]
+        fn _vrnd_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrnd_f64(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"]
+#[doc = "Floating-point round to integral, toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
-    static_assert!(N >= 1 && N <= 16);
-    simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0)
+#[cfg_attr(test, assert_instr(frintz))]
+pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.trunc.v2f64"
+        )]
+        fn _vrndq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndq_f64(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqsub))]
-pub unsafe fn vqsubb_s8(a: i8, b: i8) -> i8 {
-    let a: int8x8_t = vdup_n_s8(a);
-    let b: int8x8_t = vdup_n_s8(b);
-    simd_extract!(vqsub_s8(a, b), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v2f32"
+        )]
+        fn _vrnda_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrnda_f32(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqsub))]
-pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
-    let a: int16x4_t = vdup_n_s16(a);
-    let b: int16x4_t = vdup_n_s16(b);
-    simd_extract!(vqsub_s16(a, b), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v4f32"
+        )]
+        fn _vrndaq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrndaq_f32(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqsub))]
-pub unsafe fn vqsubb_u8(a: u8, b: u8) -> u8 {
-    let a: uint8x8_t = vdup_n_u8(a);
-    let b: uint8x8_t = vdup_n_u8(b);
-    simd_extract!(vqsub_u8(a, b), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v1f64"
+        )]
+        fn _vrnda_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrnda_f64(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"]
+#[doc = "Floating-point round to integral, to nearest with ties to away"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqsub))]
-pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
-    let a: uint16x4_t = vdup_n_u16(a);
-    let b: uint16x4_t = vdup_n_u16(b);
-    simd_extract!(vqsub_u16(a, b), 0)
+#[cfg_attr(test, assert_instr(frinta))]
+pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.round.v2f64"
+        )]
+        fn _vrndaq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndaq_f64(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqsub))]
-pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.i32"
+            link_name = "llvm.nearbyint.v2f32"
         )]
-        fn _vqsubs_s32(a: i32, b: i32) -> i32;
+        fn _vrndi_f32(a: float32x2_t) -> float32x2_t;
     }
-    _vqsubs_s32(a, b)
+    _vrndi_f32(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_s64)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sqsub))]
-pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.i64"
+            link_name = "llvm.nearbyint.v4f32"
         )]
-        fn _vqsubd_s64(a: i64, b: i64) -> i64;
+        fn _vrndiq_f32(a: float32x4_t) -> float32x4_t;
     }
-    _vqsubd_s64(a, b)
+    _vrndiq_f32(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_u32)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqsub))]
-pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.i32"
+            link_name = "llvm.nearbyint.v1f64"
         )]
-        fn _vqsubs_u32(a: i32, b: i32) -> i32;
+        fn _vrndi_f64(a: float64x1_t) -> float64x1_t;
     }
-    _vqsubs_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vrndi_f64(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubd_u64)"]
+#[doc = "Floating-point round to integral, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uqsub))]
-pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
+#[cfg_attr(test, assert_instr(frinti))]
+pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.i64"
+            link_name = "llvm.nearbyint.v2f64"
         )]
-        fn _vqsubd_u64(a: i64, b: i64) -> i64;
+        fn _vrndiq_f64(a: float64x2_t) -> float64x2_t;
     }
-    _vqsubd_u64(a.as_signed(), b.as_signed()).as_unsigned()
+    _vrndiq_f64(a)
 }
-#[doc = "Rotate and exclusive OR"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(rax1))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vrax1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.rax1"
+            link_name = "llvm.floor.v2f32"
         )]
-        fn _vrax1q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        fn _vrndm_f32(a: float32x2_t) -> float32x2_t;
     }
-    _vrax1q_u64(a.as_signed(), b.as_signed()).as_unsigned()
+    _vrndm_f32(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_s8)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rbit.v8i8"
+            link_name = "llvm.floor.v4f32"
         )]
-        fn _vrbit_s8(a: int8x8_t) -> int8x8_t;
+        fn _vrndmq_f32(a: float32x4_t) -> float32x4_t;
     }
-    _vrbit_s8(a)
+    _vrndmq_f32(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_s8)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.rbit.v16i8"
+            link_name = "llvm.floor.v1f64"
         )]
-        fn _vrbitq_s8(a: int8x16_t) -> int8x16_t;
+        fn _vrndm_f64(a: float64x1_t) -> float64x1_t;
     }
-    _vrbitq_s8(a)
+    _vrndm_f64(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
+#[doc = "Floating-point round to integral, toward minus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
-    transmute(vrbit_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(frintm))]
+pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.floor.v2f64"
+        )]
+        fn _vrndmq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndmq_f64(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
-    transmute(vrbitq_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v1f64"
+        )]
+        fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrndn_f64(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
-    transmute(vrbit_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frintn.v2f64"
+        )]
+        fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrndnq_f64(a)
 }
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
+#[doc = "Floating-point round to integral, to nearest with ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
-    transmute(vrbitq_s8(transmute(a)))
+#[cfg_attr(test, assert_instr(frintn))]
+pub unsafe fn vrndns_f32(a: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.roundeven.f32"
+        )]
+        fn _vrndns_f32(a: f32) -> f32;
+    }
+    _vrndns_f32(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v1f64"
+            link_name = "llvm.ceil.v2f32"
         )]
-        fn _vrecpe_f64(a: float64x1_t) -> float64x1_t;
+        fn _vrndp_f32(a: float32x2_t) -> float32x2_t;
     }
-    _vrecpe_f64(a)
+    _vrndp_f32(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v2f64"
+            link_name = "llvm.ceil.v4f32"
         )]
-        fn _vrecpeq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vrndpq_f32(a: float32x4_t) -> float32x4_t;
     }
-    _vrecpeq_f64(a)
+    _vrndpq_f32(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecped_f64)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecped_f64(a: f64) -> f64 {
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.f64"
+            link_name = "llvm.ceil.v1f64"
         )]
-        fn _vrecped_f64(a: f64) -> f64;
+        fn _vrndp_f64(a: float64x1_t) -> float64x1_t;
     }
-    _vrecped_f64(a)
+    _vrndp_f64(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpes_f32)"]
+#[doc = "Floating-point round to integral, toward plus infinity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpe))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpes_f32(a: f32) -> f32 {
+#[cfg_attr(test, assert_instr(frintp))]
+pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.f32"
+            link_name = "llvm.ceil.v2f64"
         )]
-        fn _vrecpes_f32(a: f32) -> f32;
+        fn _vrndpq_f64(a: float64x2_t) -> float64x2_t;
     }
-    _vrecpes_f32(a)
+    _vrndpq_f64(a)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f64)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v1f64"
+            link_name = "llvm.rint.v2f32"
         )]
-        fn _vrecps_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+        fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
     }
-    _vrecps_f64(a, b)
+    _vrndx_f32(a)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f64)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v2f64"
+            link_name = "llvm.rint.v4f32"
         )]
-        fn _vrecpsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+        fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
     }
-    _vrecpsq_f64(a, b)
+    _vrndxq_f32(a)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsd_f64)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpsd_f64(a: f64, b: f64) -> f64 {
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.f64"
+            link_name = "llvm.rint.v1f64"
         )]
-        fn _vrecpsd_f64(a: f64, b: f64) -> f64;
+        fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
     }
-    _vrecpsd_f64(a, b)
+    _vrndx_f64(a)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpss_f32)"]
+#[doc = "Floating-point round to integral exact, using current rounding mode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecps))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpss_f32(a: f32, b: f32) -> f32 {
+#[cfg_attr(test, assert_instr(frintx))]
+pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.f32"
+            link_name = "llvm.rint.v2f64"
         )]
-        fn _vrecpss_f32(a: f32, b: f32) -> f32;
+        fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
     }
-    _vrecpss_f32(a, b)
+    _vrndxq_f64(a)
 }
-#[doc = "Floating-point reciprocal exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxd_f64)"]
+#[doc = "Signed rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpx))]
+#[cfg_attr(test, assert_instr(srshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpxd_f64(a: f64) -> f64 {
+pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpx.f64"
+            link_name = "llvm.aarch64.neon.srshl.i64"
         )]
-        fn _vrecpxd_f64(a: f64) -> f64;
+        fn _vrshld_s64(a: i64, b: i64) -> i64;
     }
-    _vrecpxd_f64(a)
+    _vrshld_s64(a, b)
 }
-#[doc = "Floating-point reciprocal exponent"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpxs_f32)"]
+#[doc = "Unsigned rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frecpx))]
+#[cfg_attr(test, assert_instr(urshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrecpxs_f32(a: f32) -> f32 {
+pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpx.f32"
+            link_name = "llvm.aarch64.neon.urshl.i64"
         )]
-        fn _vrecpxs_f32(a: f32) -> f32;
+        fn _vrshld_u64(a: i64, b: i64) -> i64;
     }
-    _vrecpxs_f32(a)
+    _vrshld_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
+#[doc = "Signed rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_s64(a, -N as i64)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
+#[doc = "Unsigned rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    vrshld_u64(a, -N as i64)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vrshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vrshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
+#[doc = "Rounding shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    transmute(a)
+pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v1f64"
+        )]
+        fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t;
+    }
+    _vrsqrte_f64(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.v2f64"
+        )]
+        fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t;
+    }
+    _vrsqrteq_f64(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.f64"
+        )]
+        fn _vrsqrted_f64(a: f64) -> f64;
+    }
+    _vrsqrted_f64(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
+#[doc = "Reciprocal square-root estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrte))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrte.f32"
+        )]
+        fn _vrsqrtes_f32(a: f32) -> f32;
+    }
+    _vrsqrtes_f32(a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v1f64"
+        )]
+        fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    _vrsqrts_f64(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.v2f64"
+        )]
+        fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    _vrsqrtsq_f64(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.f64"
+        )]
+        fn _vrsqrtsd_f64(a: f64, b: f64) -> f64;
+    }
+    _vrsqrtsd_f64(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "Floating-point reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frsqrts))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    transmute(a)
+pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frsqrts.f32"
+        )]
+        fn _vrsqrtss_f32(a: f32, b: f32) -> f32;
+    }
+    _vrsqrtss_f32(a, b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
+#[doc = "Signed rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(srshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: i64 = vrshrd_n_s64::<N>(b);
+    a.wrapping_add(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "Unsigned rounding shift right and accumulate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(urshr, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
-    transmute(a)
+pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    let b: u64 = vrshrd_n_u64::<N>(b);
+    a.wrapping_add(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x: int8x8_t = vrsubhn_s16(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x: int16x4_t = vrsubhn_s32(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x: int32x2_t = vrsubhn_s64(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = vrsubhn_u16(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = vrsubhn_u32(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    transmute(a)
+pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = vrsubhn_u64(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(b, LANE as u32, a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    transmute(a)
+pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(b, LANE as u32, a)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
+#[doc = "SHA512 hash update part 2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h2))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512h2"
+        )]
+        fn _vsha512h2q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512h2q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
+#[doc = "SHA512 hash update part 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512h))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512h"
+        )]
+        fn _vsha512hq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512hq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "SHA512 schedule update 0"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su0))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su0"
+        )]
+        fn _vsha512su0q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vsha512su0q_u64(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
+#[doc = "SHA512 schedule update 1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    transmute(a)
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(sha512su1))]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha512su1"
+        )]
+        fn _vsha512su1q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vsha512su1q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "Signed Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
+    transmute(vshl_s64(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "Unsigned Shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
+    transmute(vshl_u64(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_s8::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vshll_n_s16::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vshll_n_s32::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
+    static_assert!(N >= 0 && N <= 8);
+    let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    vshll_n_u8::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 16);
+    let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    vshll_n_u16::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
+#[doc = "Signed shift left long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 32);
+    let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    vshll_n_u32::<N>(b)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vshrn_n_s16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    simd_shuffle!(
+        a,
+        vshrn_n_u16::<N>(b),
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+    )
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "Shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v8i8"
+        )]
+        fn _vsli_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
+    }
+    _vsli_n_s8(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v16i8"
+        )]
+        fn _vsliq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
+    }
+    _vsliq_n_s8(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v4i16"
+        )]
+        fn _vsli_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
+    }
+    _vsli_n_s16(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v8i16"
+        )]
+        fn _vsliq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
+    }
+    _vsliq_n_s16(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v2i32"
+        )]
+        fn _vsli_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
+    }
+    _vsli_n_s32(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v4i32"
+        )]
+        fn _vsliq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
+    }
+    _vsliq_n_s32(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v1i64"
+        )]
+        fn _vsli_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
+    }
+    _vsli_n_s64(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vsli.v2i64"
+        )]
+        fn _vsliq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
+    }
+    _vsliq_n_s64(a, b, N)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vsli_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vsliq_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vsli_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
-    transmute(a)
+pub unsafe fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vsliq_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    transmute(vsli_n_s32::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    transmute(vsliq_n_s32::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    transmute(a)
+pub unsafe fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsliq_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vsli_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    transmute(a)
-}
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub unsafe fn vrnd32x_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v2f32"
-        )]
-        fn _vrnd32x_f32(a: float32x2_t) -> float32x2_t;
-    }
-    _vrnd32x_f32(a)
+pub unsafe fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vsliq_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v4f32"
-        )]
-        fn _vrnd32xq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    _vrnd32xq_f32(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vsli_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32x.v2f64"
-        )]
-        fn _vrnd32xq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrnd32xq_f64(a)
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vsliq_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32x))]
-pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint32x.f64"
-        )]
-        fn _vrnd32x_f64(a: f64) -> f64;
-    }
-    transmute(_vrnd32x_f64(simd_extract!(a, 0)))
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub unsafe fn vrnd32z_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v2f32"
-        )]
-        fn _vrnd32z_f32(a: float32x2_t) -> float32x2_t;
-    }
-    _vrnd32z_f32(a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sli, N = 1))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsliq_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f32)"]
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v4f32"
-        )]
-        fn _vrnd32zq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    _vrnd32zq_f32(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)"]
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint32z.v2f64"
-        )]
-        fn _vrnd32zq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrnd32zq_f64(a)
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
+pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to 32-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)"]
+#[doc = "SM3PARTW1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint32z))]
-pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint32z.f64"
+            link_name = "llvm.aarch64.crypto.sm3partw1"
         )]
-        fn _vrnd32z_f64(a: f64) -> f64;
+        fn _vsm3partw1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    transmute(_vrnd32z_f64(simd_extract!(a, 0)))
+    _vsm3partw1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
+#[doc = "SM3PARTW2"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub unsafe fn vrnd64x_f32(a: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3partw2))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v2f32"
+            link_name = "llvm.aarch64.crypto.sm3partw2"
         )]
-        fn _vrnd64x_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsm3partw2q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    _vrnd64x_f32(a)
+    _vsm3partw2q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f32)"]
+#[doc = "SM3SS1"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3ss1))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v4f32"
+            link_name = "llvm.aarch64.crypto.sm3ss1"
         )]
-        fn _vrnd64xq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsm3ss1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    _vrnd64xq_f32(a)
+    _vsm3ss1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)"]
+#[doc = "SM3TT1A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1aq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64x.v2f64"
+            link_name = "llvm.aarch64.crypto.sm3tt1a"
         )]
-        fn _vrnd64xq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsm3tt1aq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64) -> int32x4_t;
     }
-    _vrnd64xq_f64(a)
+    _vsm3tt1aq_u32(a.as_signed(), b.as_signed(), c.as_signed(), IMM2 as i64).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)"]
+#[doc = "SM3TT1B"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt1bq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64x))]
-pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint64x.f64"
+            link_name = "llvm.aarch64.crypto.sm3tt1b"
         )]
-        fn _vrnd64x_f64(a: f64) -> f64;
+        fn _vsm3tt1bq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64) -> int32x4_t;
     }
-    transmute(_vrnd64x_f64(simd_extract!(a, 0)))
+    _vsm3tt1bq_u32(a.as_signed(), b.as_signed(), c.as_signed(), IMM2 as i64).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
+#[doc = "SM3TT2A"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2aq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub unsafe fn vrnd64z_f32(a: float32x2_t) -> float32x2_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v2f32"
+            link_name = "llvm.aarch64.crypto.sm3tt2a"
         )]
-        fn _vrnd64z_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsm3tt2aq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64) -> int32x4_t;
     }
-    _vrnd64z_f32(a)
+    _vsm3tt2aq_u32(a.as_signed(), b.as_signed(), c.as_signed(), IMM2 as i64).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f32)"]
+#[doc = "SM3TT2B"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3tt2bq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(IMM2, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v4f32"
+            link_name = "llvm.aarch64.crypto.sm3tt2b"
         )]
-        fn _vrnd64zq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsm3tt2bq_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64) -> int32x4_t;
     }
-    _vrnd64zq_f32(a)
+    _vsm3tt2bq_u32(a.as_signed(), b.as_signed(), c.as_signed(), IMM2 as i64).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)"]
+#[doc = "SM4 key"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frint64z.v2f64"
+            link_name = "llvm.aarch64.crypto.sm4ekey"
         )]
-        fn _vrnd64zq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsm4ekeyq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _vrnd64zq_f64(a)
+    _vsm4ekeyq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point round to 64-bit integer toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)"]
+#[doc = "SM4 encode"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,frintts")]
-#[unstable(feature = "stdarch_neon_ftts", issue = "117227")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(frint64z))]
-pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
+#[target_feature(enable = "neon,sm4")]
+#[cfg_attr(test, assert_instr(sm4e))]
+#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
+pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.frint64z.f64"
+            link_name = "llvm.aarch64.crypto.sm4e"
         )]
-        fn _vrnd64z_f64(a: f64) -> f64;
+        fn _vsm4eq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    transmute(_vrnd64z_f64(simd_extract!(a, 0)))
+    _vsm4eq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f32)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.trunc.v2f32"
+            link_name = "llvm.aarch64.neon.usqadd.v8i8"
         )]
-        fn _vrnd_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsqadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vrnd_f32(a)
+    _vsqadd_u8(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f32)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.trunc.v4f32"
+            link_name = "llvm.aarch64.neon.usqadd.v16i8"
         )]
-        fn _vrndq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsqaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    _vrndq_f32(a)
+    _vsqaddq_u8(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f64)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.trunc.v1f64"
+            link_name = "llvm.aarch64.neon.usqadd.v4i16"
         )]
-        fn _vrnd_f64(a: float64x1_t) -> float64x1_t;
+        fn _vsqadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vrnd_f64(a)
+    _vsqadd_u16(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f64)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintz))]
-pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.trunc.v2f64"
+            link_name = "llvm.aarch64.neon.usqadd.v8i16"
         )]
-        fn _vrndq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsqaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    _vrndq_f64(a)
+    _vsqaddq_u16(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f32)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.round.v2f32"
+            link_name = "llvm.aarch64.neon.usqadd.v2i32"
         )]
-        fn _vrnda_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsqadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vrnda_f32(a)
+    _vsqadd_u32(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f32)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.round.v4f32"
+            link_name = "llvm.aarch64.neon.usqadd.v4i32"
         )]
-        fn _vrndaq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsqaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _vrndaq_f32(a)
+    _vsqaddq_u32(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f64)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadd_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.round.v1f64"
+            link_name = "llvm.aarch64.neon.usqadd.v1i64"
         )]
-        fn _vrnda_f64(a: float64x1_t) -> float64x1_t;
+        fn _vsqadd_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-    _vrnda_f64(a)
+    _vsqadd_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, to nearest with ties to away"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f64)"]
+#[doc = "Unsigned saturating Accumulate of Signed value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinta))]
-pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
+#[cfg_attr(test, assert_instr(usqadd))]
+pub unsafe fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.round.v2f64"
+            link_name = "llvm.aarch64.neon.usqadd.v2i64"
         )]
-        fn _vrndaq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsqaddq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-    _vrndaq_f64(a)
+    _vsqaddq_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f32)"]
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v2f32"
-        )]
-        fn _vrndi_f32(a: float32x2_t) -> float32x2_t;
-    }
-    _vrndi_f32(a)
+pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
+    simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f32)"]
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
+pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
+    simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
+}
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v4f32"
+            link_name = "llvm.aarch64.neon.usqadd.i64"
         )]
-        fn _vrndiq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsqaddd_u64(a: i64, b: i64) -> i64;
     }
-    _vrndiq_f32(a)
+    _vsqaddd_u64(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f64)"]
+#[doc = "Unsigned saturating accumulate of signed value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
+pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v1f64"
+            link_name = "llvm.aarch64.neon.usqadd.i32"
         )]
-        fn _vrndi_f64(a: float64x1_t) -> float64x1_t;
+        fn _vsqadds_u32(a: i32, b: i32) -> i32;
     }
-    _vrndi_f64(a)
+    _vsqadds_u32(a.as_signed(), b).as_unsigned()
 }
-#[doc = "Floating-point round to integral, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f64)"]
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
+    simd_fsqrt(a)
+}
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
+    simd_fsqrt(a)
+}
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
+    simd_fsqrt(a)
+}
+#[doc = "Calculates the square root of each lane."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frinti))]
-pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.nearbyint.v2f64"
-        )]
-        fn _vrndiq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrndiq_f64(a)
+pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
+    simd_fsqrt(a)
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
+pub unsafe fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.floor.v2f32"
+            link_name = "llvm.aarch64.neon.vsri.v8i8"
         )]
-        fn _vrndm_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsri_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
     }
-    _vrndm_f32(a)
+    _vsri_n_s8(a, b, N)
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
+pub unsafe fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.floor.v4f32"
+            link_name = "llvm.aarch64.neon.vsri.v16i8"
         )]
-        fn _vrndmq_f32(a: float32x4_t) -> float32x4_t;
+        fn _vsriq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
     }
-    _vrndmq_f32(a)
+    _vsriq_n_s8(a, b, N)
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
+pub unsafe fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.floor.v1f64"
+            link_name = "llvm.aarch64.neon.vsri.v4i16"
         )]
-        fn _vrndm_f64(a: float64x1_t) -> float64x1_t;
+        fn _vsri_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
     }
-    _vrndm_f64(a)
+    _vsri_n_s16(a, b, N)
 }
-#[doc = "Floating-point round to integral, toward minus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintm))]
-pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
+pub unsafe fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.floor.v2f64"
+            link_name = "llvm.aarch64.neon.vsri.v8i16"
         )]
-        fn _vrndmq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsriq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
     }
-    _vrndmq_f64(a)
+    _vsriq_n_s16(a, b, N)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndn_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
+pub unsafe fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frintn.v1f64"
+            link_name = "llvm.aarch64.neon.vsri.v2i32"
         )]
-        fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
+        fn _vsri_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
     }
-    _vrndn_f64(a)
+    _vsri_n_s32(a, b, N)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndnq_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
+pub unsafe fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frintn.v2f64"
+            link_name = "llvm.aarch64.neon.vsri.v4i32"
         )]
-        fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
+        fn _vsriq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
     }
-    _vrndnq_f64(a)
+    _vsriq_n_s32(a, b, N)
 }
-#[doc = "Floating-point round to integral, to nearest with ties to even"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndns_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndns_f32(a: f32) -> f32 {
+pub unsafe fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 1 && N <= 64);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.roundeven.f32"
+            link_name = "llvm.aarch64.neon.vsri.v1i64"
         )]
-        fn _vrndns_f32(a: f32) -> f32;
+        fn _vsri_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
     }
-    _vrndns_f32(a)
+    _vsri_n_s64(a, b, N)
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
+pub unsafe fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 1 && N <= 64);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.ceil.v2f32"
+            link_name = "llvm.aarch64.neon.vsri.v2i64"
         )]
-        fn _vrndp_f32(a: float32x2_t) -> float32x2_t;
+        fn _vsriq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
     }
-    _vrndp_f32(a)
+    _vsriq_n_s64(a, b, N)
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.ceil.v4f32"
-        )]
-        fn _vrndpq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    _vrndpq_f32(a)
+pub unsafe fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    transmute(vsri_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.ceil.v1f64"
-        )]
-        fn _vrndp_f64(a: float64x1_t) -> float64x1_t;
-    }
-    _vrndp_f64(a)
+pub unsafe fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    transmute(vsriq_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral, toward plus infinity"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintp))]
-pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.ceil.v2f64"
-        )]
-        fn _vrndpq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrndpq_f64(a)
+pub unsafe fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    transmute(vsri_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f32"
-        )]
-        fn _vrndx_f32(a: float32x2_t) -> float32x2_t;
-    }
-    _vrndx_f32(a)
+pub unsafe fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    transmute(vsriq_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v4f32"
-        )]
-        fn _vrndxq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    _vrndxq_f32(a)
+pub unsafe fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    transmute(vsri_n_s32::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v1f64"
-        )]
-        fn _vrndx_f64(a: float64x1_t) -> float64x1_t;
-    }
-    _vrndx_f64(a)
+pub unsafe fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    transmute(vsriq_n_s32::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point round to integral exact, using current rounding mode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(frintx))]
-pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.rint.v2f64"
-        )]
-        fn _vrndxq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrndxq_f64(a)
+pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Signed rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_s64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshl))]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.srshl.i64"
-        )]
-        fn _vrshld_s64(a: i64, b: i64) -> i64;
-    }
-    _vrshld_s64(a, b)
+pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsriq_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Unsigned rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshld_u64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshl))]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urshl.i64"
-        )]
-        fn _vrshld_u64(a: i64, b: i64) -> i64;
-    }
-    _vrshld_u64(a.as_signed(), b).as_unsigned()
+pub unsafe fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    transmute(vsri_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Signed rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_s64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_s64(a, -N as i64)
+pub unsafe fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert!(N >= 1 && N <= 8);
+    transmute(vsriq_n_s8::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Unsigned rounding shift right"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrd_n_u64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    vrshld_u64(a, -N as i64)
+pub unsafe fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    transmute(vsri_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vrshrn_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert!(N >= 1 && N <= 16);
+    transmute(vsriq_n_s16::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(sri, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsriq_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vrshrn_n_u16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
 }
-#[doc = "Rounding shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrte_f64(a: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v1f64"
-        )]
-        fn _vrsqrte_f64(a: float64x1_t) -> float64x1_t;
-    }
-    _vrsqrte_f64(a)
+pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrteq_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.v2f64"
-        )]
-        fn _vrsqrteq_f64(a: float64x2_t) -> float64x2_t;
-    }
-    _vrsqrteq_f64(a)
+pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrted_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrted_f64(a: f64) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.f64"
-        )]
-        fn _vrsqrted_f64(a: f64) -> f64;
-    }
-    _vrsqrted_f64(a)
+pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Reciprocal square-root estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtes_f32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrte))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtes_f32(a: f32) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrte.f32"
-        )]
-        fn _vrsqrtes_f32(a: f32) -> f32;
-    }
-    _vrsqrtes_f32(a)
+pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrts_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v1f64"
-        )]
-        fn _vrsqrts_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
-    }
-    _vrsqrts_f64(a, b)
+pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsq_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.v2f64"
-        )]
-        fn _vrsqrtsq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-    }
-    _vrsqrtsq_f64(a, b)
+pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtsd_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtsd_f64(a: f64, b: f64) -> f64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.f64"
-        )]
-        fn _vrsqrtsd_f64(a: f64, b: f64) -> f64;
-    }
-    _vrsqrtsd_f64(a, b)
+pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Floating-point reciprocal square root step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrtss_f32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frsqrts))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsqrtss_f32(a: f32, b: f32) -> f32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frsqrts.f32"
-        )]
-        fn _vrsqrtss_f32(a: f32, b: f32) -> f32;
-    }
-    _vrsqrtss_f32(a, b)
+pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Signed rounding shift right and accumulate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(srshr, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: i64 = vrshrd_n_s64::<N>(b);
-    a.wrapping_add(b)
+pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Unsigned rounding shift right and accumulate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsrad_n_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(urshr, N = 2))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    let b: u64 = vrshrd_n_u64::<N>(b);
-    a.wrapping_add(b)
+pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3])
+pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rsubhn2))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    simd_shuffle!(a, x, [0, 1, 2, 3])
+pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vset_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(b, LANE as u32, a)
+pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsetq_lane_f64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsetq_lane_f64<const LANE: i32>(a: f64, b: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(b, LANE as u32, a)
+pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "SHA512 hash update part 2"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512h2q_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h2))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512h2q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512h2"
-        )]
-        fn _vsha512h2q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    _vsha512h2q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "SHA512 hash update part 1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512hq_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512h))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512hq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512h"
-        )]
-        fn _vsha512hq_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    _vsha512hq_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "SHA512 schedule update 0"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su0q_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su0))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512su0q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512su0"
-        )]
-        fn _vsha512su0q_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vsha512su0q_u64(a.as_signed(), b.as_signed()).as_unsigned()
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "SHA512 schedule update 1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha512su1q_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(sha512su1))]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vsha512su1q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sha512su1"
-        )]
-        fn _vsha512su1q_u64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
-    }
-    _vsha512su1q_u64(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Signed Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_s64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshl))]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
-    transmute(vshl_s64(transmute(a), transmute(b)))
+pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Unsigned Shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshld_u64)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushl))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
-    transmute(vshl_u64(transmute(a), transmute(b)))
+pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(str))]
+#[allow(clippy::cast_ptr_alignment)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
+    crate::ptr::write_unaligned(ptr.cast(), a)
+}
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_s8::<N>(b)
+pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
+    }
+    _vst1_f64_x2(b.0, b.1, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vshll_n_s16::<N>(b)
+pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
+    }
+    _vst1q_f64_x2(b.0, b.1, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vshll_n_s32::<N>(b)
+pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
+    }
+    _vst1_f64_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
-    static_assert!(N >= 0 && N <= 8);
-    let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    vshll_n_u8::<N>(b)
+pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
+    }
+    _vst1q_f64_x3(b.0, b.1, b.2, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 16);
-    let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    vshll_n_u16::<N>(b)
+pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64"
+        )]
+        fn _vst1_f64_x4(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            ptr: *mut f64,
+        );
+    }
+    _vst1_f64_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Signed shift left long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
+#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
-#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 32);
-    let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    vshll_n_u32::<N>(b)
+pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64"
+        )]
+        fn _vst1q_f64_x4(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            ptr: *mut f64,
+        );
+    }
+    _vst1q_f64_x4(b.0, b.1, b.2, b.3, a)
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vshrn_n_s16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
+    static_assert!(LANE == 0);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
+#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    *a = simd_extract!(b, LANE as u32);
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st1))]
+pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v1f64.p0i8"
+        )]
+        fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
+    }
+    _vst2_f64(b.0, b.1, a as _)
+}
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8"
+        )]
+        fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst2_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    simd_shuffle!(
-        a,
-        vshrn_n_u16::<N>(b),
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-    )
+pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8"
+        )]
+        fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst2_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
+    static_assert!(LANE == 0);
+    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3])
+pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
+    static_assert!(LANE == 0);
+    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Shift left and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_s64)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-pub unsafe fn vslid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_s64::<N>(transmute(a), transmute(b)))
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st2.v2f64.p0i8"
+        )]
+        fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
+    }
+    _vst2q_f64(b.0, b.1, a as _)
 }
-#[doc = "Shift left and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vslid_n_u64)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sli, N = 2))]
-pub unsafe fn vslid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_u64::<N>(transmute(a), transmute(b)))
-}
-#[doc = "SM3PARTW1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw1q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3partw1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3partw1"
+            link_name = "llvm.aarch64.neon.st2.v2i64.p0i8"
         )]
-        fn _vsm3partw1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
     }
-    _vsm3partw1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vst2q_s64(b.0, b.1, a as _)
 }
-#[doc = "SM3PARTW2"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3partw2q_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3partw2))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3partw2q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3partw2"
+            link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8"
         )]
-        fn _vsm3partw2q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
     }
-    _vsm3partw2q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "SM3SS1"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm3ss1q_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3ss1))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3ss1q_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3ss1"
+            link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8"
         )]
-        fn _vsm3ss1q_u32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
     }
-    _vsm3ss1q_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+    _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "SM4 key"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4ekeyq_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm4ekey))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm4ekeyq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm4ekey"
+            link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8"
         )]
-        fn _vsm4ekeyq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
     }
-    _vsm4ekeyq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "SM4 encode"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsm4eq_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm4e))]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm4eq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm4e"
-        )]
-        fn _vsm4eq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vsm4eq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Unsigned saturating accumulate of signed value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddb_u8)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddb_u8(a: u8, b: i8) -> u8 {
-    simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0)
+pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Unsigned saturating accumulate of signed value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddh_u16(a: u16, b: i16) -> u16 {
-    simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0)
+pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Unsigned saturating accumulate of signed value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
+#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddd_u64(a: u64, b: i64) -> u64 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usqadd.i64"
-        )]
-        fn _vsqaddd_u64(a: i64, b: i64) -> i64;
-    }
-    _vsqaddd_u64(a.as_signed(), b).as_unsigned()
+pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Unsigned saturating accumulate of signed value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqadds_u32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadds_u32(a: u32, b: i32) -> u32 {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usqadd.i32"
-        )]
-        fn _vsqadds_u32(a: i32, b: i32) -> i32;
-    }
-    _vsqadds_u32(a.as_signed(), b).as_unsigned()
+pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
+    vst2q_s64(transmute(a), transmute(b))
 }
-#[doc = "Calculates the square root of each lane."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f32)"]
+#[doc = "Store multiple 2-element structures from two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrt_f32(a: float32x2_t) -> float32x2_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(st2))]
+pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
+    vst2q_s64(transmute(a), transmute(b))
 }
-#[doc = "Calculates the square root of each lane."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f32)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrtq_f32(a: float32x4_t) -> float32x4_t {
-    simd_fsqrt(a)
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3.v1f64.p0i8"
+        )]
+        fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
+    }
+    _vst3_f64(b.0, b.1, b.2, a as _)
 }
-#[doc = "Calculates the square root of each lane."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrt_f64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrt_f64(a: float64x1_t) -> float64x1_t {
-    simd_fsqrt(a)
+pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8"
+        )]
+        fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Calculates the square root of each lane."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqrtq_f64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fsqrt))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqrtq_f64(a: float64x2_t) -> float64x2_t {
-    simd_fsqrt(a)
+pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8"
+        )]
+        fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
+    }
+    _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Shift right and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_s64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
-pub unsafe fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_s64::<N>(transmute(a), transmute(b)))
+pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
+    static_assert!(LANE == 0);
+    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Shift right and insert"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsrid_n_u64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
-pub unsafe fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_u64::<N>(transmute(a), transmute(b)))
+pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
+    static_assert!(LANE == 0);
+    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64"
+            link_name = "llvm.aarch64.neon.st3.v2f64.p0i8"
         )]
-        fn _vst1_f64_x2(a: float64x1_t, b: float64x1_t, ptr: *mut f64);
+        fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
     }
-    _vst1_f64_x2(b.0, b.1, a)
+    _vst3q_f64(b.0, b.1, b.2, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64"
+            link_name = "llvm.aarch64.neon.st3.v2i64.p0i8"
         )]
-        fn _vst1q_f64_x2(a: float64x2_t, b: float64x2_t, ptr: *mut f64);
+        fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
     }
-    _vst1q_f64_x2(b.0, b.1, a)
+    _vst3q_s64(b.0, b.1, b.2, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
+pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64"
+            link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8"
         )]
-        fn _vst1_f64_x3(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64);
+        fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
     }
-    _vst1_f64_x3(b.0, b.1, b.2, a)
+    _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
+pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64"
+            link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8"
         )]
-        fn _vst1q_f64_x3(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64);
+        fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
     }
-    _vst1q_f64_x3(b.0, b.1, b.2, a)
+    _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
+pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64"
+            link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8"
         )]
-        fn _vst1_f64_x4(
-            a: float64x1_t,
-            b: float64x1_t,
-            c: float64x1_t,
-            d: float64x1_t,
-            ptr: *mut f64,
-        );
+        fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
     }
-    _vst1_f64_x4(b.0, b.1, b.2, b.3, a)
+    _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Store multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64"
-        )]
-        fn _vst1q_f64_x4(
-            a: float64x2_t,
-            b: float64x2_t,
-            c: float64x2_t,
-            d: float64x2_t,
-            ptr: *mut f64,
-        );
-    }
-    _vst1q_f64_x4(b.0, b.1, b.2, b.3, a)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
-    static_assert!(LANE == 0);
-    *a = simd_extract!(b, LANE as u32);
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(st3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
+pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
+    static_assert_uimm_bits!(LANE, 4);
+    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    *a = simd_extract!(b, LANE as u32);
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
+    vst3q_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"]
+#[doc = "Store multiple 3-element structures from three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
-pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
+#[cfg_attr(test, assert_instr(st3))]
+pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
+    vst3q_s64(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v1f64.p0i8"
+            link_name = "llvm.aarch64.neon.st4.v1f64.p0i8"
         )]
-        fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
+        fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
     }
-    _vst2_f64(b.0, b.1, a as _)
+    _vst4_f64(b.0, b.1, b.2, b.3, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
+pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
     static_assert!(LANE == 0);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v1f64.p0i8"
+            link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8"
         )]
-        fn _vst2_lane_f64(a: float64x1_t, b: float64x1_t, n: i64, ptr: *mut i8);
+        fn _vst4_lane_f64(
+            a: float64x1_t,
+            b: float64x1_t,
+            c: float64x1_t,
+            d: float64x1_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst2_lane_f64(b.0, b.1, LANE as i64, a as _)
+    _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
+pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
     static_assert!(LANE == 0);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v1i64.p0i8"
+            link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8"
         )]
-        fn _vst2_lane_s64(a: int64x1_t, b: int64x1_t, n: i64, ptr: *mut i8);
+        fn _vst4_lane_s64(
+            a: int64x1_t,
+            b: int64x1_t,
+            c: int64x1_t,
+            d: int64x1_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst2_lane_s64(b.0, b.1, LANE as i64, a as _)
+    _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
+pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
     static_assert!(LANE == 0);
-    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
+    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
     static_assert!(LANE == 0);
-    vst2_lane_s64::<LANE>(transmute(a), transmute(b))
+    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2f64.p0i8"
+            link_name = "llvm.aarch64.neon.st4.v2f64.p0i8"
         )]
-        fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
+        fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
     }
-    _vst2q_f64(b.0, b.1, a as _)
+    _vst4q_f64(b.0, b.1, b.2, b.3, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2i64.p0i8"
+            link_name = "llvm.aarch64.neon.st4.v2i64.p0i8"
         )]
-        fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
+        fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
     }
-    _vst2q_s64(b.0, b.1, a as _)
+    _vst4q_s64(b.0, b.1, b.2, b.3, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
+pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v2f64.p0i8"
+            link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8"
         )]
-        fn _vst2q_lane_f64(a: float64x2_t, b: float64x2_t, n: i64, ptr: *mut i8);
+        fn _vst4q_lane_f64(
+            a: float64x2_t,
+            b: float64x2_t,
+            c: float64x2_t,
+            d: float64x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst2q_lane_f64(b.0, b.1, LANE as i64, a as _)
+    _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
+pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
     static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8"
         )]
-        fn _vst2q_lane_s8(a: int8x16_t, b: int8x16_t, n: i64, ptr: *mut i8);
+        fn _vst4q_lane_s8(
+            a: int8x16_t,
+            b: int8x16_t,
+            c: int8x16_t,
+            d: int8x16_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst2q_lane_s8(b.0, b.1, LANE as i64, a as _)
+    _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
+pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2lane.v2i64.p0i8"
+            link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8"
         )]
-        fn _vst2q_lane_s64(a: int64x2_t, b: int64x2_t, n: i64, ptr: *mut i8);
+        fn _vst4q_lane_s64(
+            a: int64x2_t,
+            b: int64x2_t,
+            c: int64x2_t,
+            d: int64x2_t,
+            n: i64,
+            ptr: *mut i8,
+        );
     }
-    _vst2q_lane_s64(b.0, b.1, LANE as i64, a as _)
+    _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
+pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
+    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
     static_assert_uimm_bits!(LANE, 4);
-    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
+    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
     static_assert_uimm_bits!(LANE, 1);
-    vst2q_lane_s64::<LANE>(transmute(a), transmute(b))
+    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
+#[cfg_attr(test, assert_instr(st4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
     static_assert_uimm_bits!(LANE, 4);
-    vst2q_lane_s8::<LANE>(transmute(a), transmute(b))
+    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"]
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st2))]
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
+    vst4q_s64(transmute(a), transmute(b))
+}
+#[doc = "Store multiple 4-element structures from four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
-    vst2q_s64(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(st4))]
+pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
+    vst4q_s64(transmute(a), transmute(b))
 }
-#[doc = "Store multiple 2-element structures from two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"]
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st2))]
-pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
-    vst2q_s64(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(fsub))]
+pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_sub(a, b)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_sub(a, b)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_sub(b)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_sub(b)
+}
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: int16x8_t = simd_cast(c);
+    let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: int16x8_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"]
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v1f64.p0i8"
-        )]
-        fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
-    }
-    _vst3_f64(b.0, b.1, b.2, a as _)
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let d: int32x4_t = simd_cast(c);
+    let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let f: int32x4_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
+#[doc = "Signed Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v1f64.p0i8"
-        )]
-        fn _vst3_lane_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, n: i64, ptr: *mut i8);
-    }
-    _vst3_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(ssubl))]
+pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let d: int64x2_t = simd_cast(c);
+    let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let f: int64x2_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v1i64.p0i8"
-        )]
-        fn _vst3_lane_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, n: i64, ptr: *mut i8);
-    }
-    _vst3_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let d: uint16x8_t = simd_cast(c);
+    let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    let f: uint16x8_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
-    static_assert!(LANE == 0);
-    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
+    let d: uint32x4_t = simd_cast(c);
+    let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    let f: uint32x4_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"]
+#[doc = "Unsigned Subtract Long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
-    static_assert!(LANE == 0);
-    vst3_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(usubl))]
+pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
+    let d: uint64x2_t = simd_cast(c);
+    let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    let f: uint64x2_t = simd_cast(e);
+    simd_sub(d, f)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2f64.p0i8"
-        )]
-        fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
-    }
-    _vst3q_f64(b.0, b.1, b.2, a as _)
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2i64.p0i8"
-        )]
-        fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
-    }
-    _vst3q_s64(b.0, b.1, b.2, a as _)
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
+#[doc = "Signed Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v2f64.p0i8"
-        )]
-        fn _vst3q_lane_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_f64(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(ssubw))]
+pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v16i8.p0i8"
-        )]
-        fn _vst3q_lane_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3lane.v2i64.p0i8"
-        )]
-        fn _vst3q_lane_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, n: i64, ptr: *mut i8);
-    }
-    _vst3q_lane_s64(b.0, b.1, b.2, LANE as i64, a as _)
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"]
+#[doc = "Unsigned Subtract Wide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
+#[cfg_attr(test, assert_instr(usubw))]
+pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
+    simd_sub(a, simd_cast(c))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"]
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vsudot_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int8x8_t,
+    c: uint8x16_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vusdot_s32(a, transmute(c), b)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"]
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst3q_lane_s64::<LANE>(transmute(a), transmute(b))
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub unsafe fn vsudotq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: uint8x16_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c: uint32x4_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vusdotq_s32(a, transmute(c), b)
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst3q_lane_s8::<LANE>(transmute(a), transmute(b))
-}
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
-    vst3q_s64(transmute(a), transmute(b))
+pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1_s8(vcombine_s8(a, crate::mem::zeroed()), transmute(b))
 }
-#[doc = "Store multiple 3-element structures from three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st3))]
-pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
-    vst3q_s64(transmute(a), transmute(b))
+pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    vqtbl1_u8(vcombine_u8(a, crate::mem::zeroed()), b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v1f64.p0i8"
-        )]
-        fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
-    }
-    _vst4_f64(b.0, b.1, b.2, b.3, a as _)
+pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    vqtbl1_p8(vcombine_p8(a, crate::mem::zeroed()), b)
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v1f64.p0i8"
-        )]
-        fn _vst4_lane_f64(
-            a: float64x1_t,
-            b: float64x1_t,
-            c: float64x1_t,
-            d: float64x1_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
+    vqtbl1(transmute(vcombine_s8(a.0, a.1)), transmute(b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
-    static_assert!(LANE == 0);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v1i64.p0i8"
-        )]
-        fn _vst4_lane_s64(
-            a: int64x1_t,
-            b: int64x1_t,
-            c: int64x1_t,
-            d: int64x1_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
-    static_assert!(LANE == 0);
-    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x2_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
-    static_assert!(LANE == 0);
-    vst4_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2f64.p0i8"
-        )]
-        fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
-    }
-    _vst4q_f64(b.0, b.1, b.2, b.3, a as _)
+pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x2_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2i64.p0i8"
-        )]
-        fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
-    }
-    _vst4q_s64(b.0, b.1, b.2, b.3, a as _)
+pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(
+        vcombine_s8(a.0, a.1),
+        vcombine_s8(a.2, crate::mem::zeroed()),
+    );
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b)))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v2f64.p0i8"
-        )]
-        fn _vst4q_lane_f64(
-            a: float64x2_t,
-            b: float64x2_t,
-            c: float64x2_t,
-            d: float64x2_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4q_lane_f64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(
+        vcombine_u8(a.0, a.1),
+        vcombine_u8(a.2, crate::mem::zeroed()),
+    );
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v16i8.p0i8"
-        )]
-        fn _vst4q_lane_s8(
-            a: int8x16_t,
-            b: int8x16_t,
-            c: int8x16_t,
-            d: int8x16_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4q_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x3_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = uint8x16x2_t(
+        vcombine_u8(a.0, a.1),
+        vcombine_u8(a.2, crate::mem::zeroed()),
+    );
+    let ret_val: uint8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4lane.v2i64.p0i8"
-        )]
-        fn _vst4q_lane_s64(
-            a: int64x2_t,
-            b: int64x2_t,
-            c: int64x2_t,
-            d: int64x2_t,
-            n: i64,
-            ptr: *mut i8,
-        );
-    }
-    _vst4q_lane_s64(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(
+        vcombine_p8(a.0, a.1),
+        vcombine_p8(a.2, crate::mem::zeroed()),
+    );
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x3_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = poly8x16x2_t(
+        vcombine_p8(a.0, a.1),
+        vcombine_p8(a.2, crate::mem::zeroed()),
+    );
+    let ret_val: poly8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3));
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b)))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
-    static_assert_uimm_bits!(LANE, 1);
-    vst4q_lane_s64::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
-    static_assert_uimm_bits!(LANE, 4);
-    vst4q_lane_s8::<LANE>(transmute(a), transmute(b))
+pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x4_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
+    let ret_val: uint8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
-    vst4q_s64(transmute(a), transmute(b))
+pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
+    transmute(vqtbl2(transmute(x.0), transmute(x.1), b))
 }
-#[doc = "Store multiple 4-element structures from four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"]
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st4))]
-pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
-    vst4q_s64(transmute(a), transmute(b))
+pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x4_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
+    let ret_val: poly8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fsub))]
-pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_sub(a, b)
+pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    simd_select(
+        simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(8))),
+        transmute(vqtbx1(
+            transmute(a),
+            transmute(vcombine_s8(b, crate::mem::zeroed())),
+            transmute(c),
+        )),
+        a,
+    )
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubq_f64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(fsub))]
-pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_sub(a, b)
+pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    simd_select(
+        simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
+        transmute(vqtbx1(
+            transmute(a),
+            transmute(vcombine_u8(b, crate::mem::zeroed())),
+            c,
+        )),
+        a,
+    )
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_s64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vsubd_s64(a: i64, b: i64) -> i64 {
-    a.wrapping_sub(b)
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    simd_select(
+        simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
+        transmute(vqtbx1(
+            transmute(a),
+            transmute(vcombine_p8(b, crate::mem::zeroed())),
+            c,
+        )),
+        a,
+    )
 }
-#[doc = "Subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubd_u64)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vsubd_u64(a: u64, b: u64) -> u64 {
-    a.wrapping_sub(b)
+pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
+    vqtbx1(transmute(a), transmute(vcombine_s8(b.0, b.1)), transmute(c))
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl))]
-pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let d: int16x8_t = simd_cast(c);
-    let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: int16x8_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c))
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl))]
-pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let d: int32x4_t = simd_cast(c);
-    let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let f: int32x4_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x2_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl))]
-pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let d: int64x2_t = simd_cast(c);
-    let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let f: int64x2_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c))
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl))]
-pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let d: uint16x8_t = simd_cast(c);
-    let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    let f: uint16x8_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x2_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl))]
-pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-    let d: uint32x4_t = simd_cast(c);
-    let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    let f: uint32x4_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(
+        vcombine_s8(b.0, b.1),
+        vcombine_s8(b.2, crate::mem::zeroed()),
+    );
+    transmute(simd_select(
+        simd_lt::<int8x8_t, int8x8_t>(transmute(c), transmute(i8x8::splat(24))),
+        transmute(vqtbx2(
+            transmute(a),
+            transmute(x.0),
+            transmute(x.1),
+            transmute(c),
+        )),
+        a,
+    ))
 }
-#[doc = "Unsigned Subtract Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubl_high_u32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl))]
-pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-    let d: uint64x2_t = simd_cast(c);
-    let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    let f: uint64x2_t = simd_cast(e);
-    simd_sub(d, f)
+pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    let x = uint8x16x2_t(
+        vcombine_u8(b.0, b.1),
+        vcombine_u8(b.2, crate::mem::zeroed()),
+    );
+    transmute(simd_select(
+        simd_lt::<uint8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
+        transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        a,
+    ))
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw))]
-pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x3_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = uint8x16x2_t(
+        vcombine_u8(b.0, b.1),
+        vcombine_u8(b.2, crate::mem::zeroed()),
+    );
+    let ret_val: uint8x8_t = transmute(simd_select(
+        simd_lt::<uint8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
+        transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        a,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw))]
-pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    let x = poly8x16x2_t(
+        vcombine_p8(b.0, b.1),
+        vcombine_p8(b.2, crate::mem::zeroed()),
+    );
+    transmute(simd_select(
+        simd_lt::<poly8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
+        transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        a,
+    ))
 }
-#[doc = "Signed Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw))]
-pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x3_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let x = poly8x16x2_t(
+        vcombine_p8(b.0, b.1),
+        vcombine_p8(b.2, crate::mem::zeroed()),
+    );
+    let ret_val: poly8x8_t = transmute(simd_select(
+        simd_lt::<poly8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
+        transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        a,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw))]
-pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    vqtbx2(
+        transmute(a),
+        transmute(vcombine_s8(b.0, b.1)),
+        transmute(vcombine_s8(b.2, b.3)),
+        transmute(c),
+    )
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw))]
-pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vqtbx2(
+        transmute(a),
+        transmute(vcombine_u8(b.0, b.1)),
+        transmute(vcombine_u8(b.2, b.3)),
+        c,
+    ))
 }
-#[doc = "Unsigned Subtract Wide"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw))]
-pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-    simd_sub(a, simd_cast(c))
+pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x4_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vqtbx2(
+        transmute(a),
+        transmute(vcombine_u8(b.0, b.1)),
+        transmute(vcombine_u8(b.2, b.3)),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vsudot_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int8x8_t,
-    c: uint8x16_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vusdot_s32(a, transmute(c), b)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vqtbx2(
+        transmute(a),
+        transmute(vcombine_p8(b.0, b.1)),
+        transmute(vcombine_p8(b.2, b.3)),
+        c,
+    ))
 }
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub unsafe fn vsudotq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int8x16_t,
-    c: uint8x16_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let c: uint32x4_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vusdotq_s32(a, transmute(c), b)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(tbx))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x4_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vqtbx2(
+        transmute(a),
+        transmute(vcombine_p8(b.0, b.1)),
+        transmute(vcombine_p8(b.2, b.3)),
+        c,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Transpose vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f32)"]
@@ -19950,6 +26428,150 @@ pub unsafe fn vtstd_s64(a: i64, b: i64) -> u64 {
 pub unsafe fn vtstd_u64(a: u64, b: u64) -> u64 {
     transmute(vtst_u64(transmute(a), transmute(b)))
 }
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v8i8"
+        )]
+        fn _vuqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vuqadd_s8(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v16i8"
+        )]
+        fn _vuqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vuqaddq_s8(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v4i16"
+        )]
+        fn _vuqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vuqadd_s16(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v8i16"
+        )]
+        fn _vuqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vuqaddq_s16(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v2i32"
+        )]
+        fn _vuqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vuqadd_s32(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v4i32"
+        )]
+        fn _vuqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vuqaddq_s32(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqadd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v1i64"
+        )]
+        fn _vuqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vuqadd_s64(a, b.as_signed())
+}
+#[doc = "Signed saturating Accumulate of Unsigned value."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub unsafe fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.suqadd.v2i64"
+        )]
+        fn _vuqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vuqaddq_s64(a, b.as_signed())
+}
 #[doc = "Signed saturating accumulate of unsigned value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddb_s8)"]
 #[doc = "## Safety"]
@@ -20554,6 +27176,26 @@ pub unsafe fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 pub unsafe fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15])
 }
+#[doc = "Exclusive OR and rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vxarq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,sha3")]
+#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
+pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.xar"
+        )]
+        fn _vxarq_u64(a: int64x2_t, b: int64x2_t, n: i64) -> int64x2_t;
+    }
+    _vxarq_u64(a.as_signed(), b.as_signed(), IMM6 as i64).as_unsigned()
+}
 #[doc = "Zip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f32)"]
 #[doc = "## Safety"]
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index 1b82a90719..961f950215 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -14,8 +14,7 @@ use crate::{
     core_arch::{arm_shared::*, simd::*},
     hint::unreachable_unchecked,
     intrinsics::simd::*,
-    mem::{transmute, zeroed},
-    ptr::{read_unaligned, write_unaligned},
+    mem::transmute,
 };
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -71,300 +70,6 @@ pub struct float64x2x4_t(
     pub float64x2_t,
 );
 
-#[allow(improper_ctypes)]
-unsafe extern "unadjusted" {
-    // absolute value
-    #[link_name = "llvm.aarch64.neon.abs.i64"]
-    fn vabsd_s64_(a: i64) -> i64;
-    #[link_name = "llvm.aarch64.neon.abs.v1i64"]
-    fn vabs_s64_(a: int64x1_t) -> int64x1_t;
-    #[link_name = "llvm.aarch64.neon.abs.v2i64"]
-    fn vabsq_s64_(a: int64x2_t) -> int64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.suqadd.v8i8"]
-    fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v16i8"]
-    fn vuqaddq_s8_(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v4i16"]
-    fn vuqadd_s16_(a: int16x4_t, b: uint16x4_t) -> int16x4_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v8i16"]
-    fn vuqaddq_s16_(a: int16x8_t, b: uint16x8_t) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v2i32"]
-    fn vuqadd_s32_(a: int32x2_t, b: uint32x2_t) -> int32x2_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v4i32"]
-    fn vuqaddq_s32_(a: int32x4_t, b: uint32x4_t) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v1i64"]
-    fn vuqadd_s64_(a: int64x1_t, b: uint64x1_t) -> int64x1_t;
-    #[link_name = "llvm.aarch64.neon.suqadd.v2i64"]
-    fn vuqaddq_s64_(a: int64x2_t, b: uint64x2_t) -> int64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.usqadd.v8i8"]
-    fn vsqadd_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v16i8"]
-    fn vsqaddq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v4i16"]
-    fn vsqadd_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v8i16"]
-    fn vsqaddq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v2i32"]
-    fn vsqadd_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v4i32"]
-    fn vsqaddq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v1i64"]
-    fn vsqadd_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    #[link_name = "llvm.aarch64.neon.usqadd.v2i64"]
-    fn vsqaddq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.addp.v8i16"]
-    fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.addp.v4i32"]
-    fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.addp.v2i64"]
-    fn vpaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    #[link_name = "llvm.aarch64.neon.addp.v16i8"]
-    fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"]
-    fn vaddv_s16_(a: int16x4_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"]
-    fn vaddv_s32_(a: int32x2_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v8i8"]
-    fn vaddv_s8_(a: int8x8_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i16"]
-    fn vaddv_u16_(a: uint16x4_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v2i32"]
-    fn vaddv_u32_(a: uint32x2_t) -> u32;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i8"]
-    fn vaddv_u8_(a: uint8x8_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v8i16"]
-    fn vaddvq_s16_(a: int16x8_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v4i32"]
-    fn vaddvq_s32_(a: int32x4_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.saddv.i32.v16i8"]
-    fn vaddvq_s8_(a: int8x16_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v8i16"]
-    fn vaddvq_u16_(a: uint16x8_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v4i32"]
-    fn vaddvq_u32_(a: uint32x4_t) -> u32;
-    #[link_name = "llvm.aarch64.neon.uaddv.i32.v16i8"]
-    fn vaddvq_u8_(a: uint8x16_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.saddv.i64.v2i64"]
-    fn vaddvq_s64_(a: int64x2_t) -> i64;
-    #[link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"]
-    fn vaddvq_u64_(a: uint64x2_t) -> u64;
-
-    #[link_name = "llvm.aarch64.neon.saddlv.i32.v8i8"]
-    fn vaddlv_s8_(a: int8x8_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.uaddlv.i32.v8i8"]
-    fn vaddlv_u8_(a: uint8x8_t) -> u32;
-    #[link_name = "llvm.aarch64.neon.saddlv.i32.v16i8"]
-    fn vaddlvq_s8_(a: int8x16_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.uaddlv.i32.v16i8"]
-    fn vaddlvq_u8_(a: uint8x16_t) -> u32;
-
-    #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
-    fn vmaxv_s8_(a: int8x8_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.smaxv.i8.v16i8"]
-    fn vmaxvq_s8_(a: int8x16_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"]
-    fn vmaxv_s16_(a: int16x4_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"]
-    fn vmaxvq_s16_(a: int16x8_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"]
-    fn vmaxv_s32_(a: int32x2_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"]
-    fn vmaxvq_s32_(a: int32x4_t) -> i32;
-
-    #[link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"]
-    fn vmaxv_u8_(a: uint8x8_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.umaxv.i8.v16i8"]
-    fn vmaxvq_u8_(a: uint8x16_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"]
-    fn vmaxv_u16_(a: uint16x4_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"]
-    fn vmaxvq_u16_(a: uint16x8_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"]
-    fn vmaxv_u32_(a: uint32x2_t) -> u32;
-    #[link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"]
-    fn vmaxvq_u32_(a: uint32x4_t) -> u32;
-
-    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"]
-    fn vmaxv_f32_(a: float32x2_t) -> f32;
-    #[link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"]
-    fn vmaxvq_f32_(a: float32x4_t) -> f32;
-    #[link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"]
-    fn vmaxvq_f64_(a: float64x2_t) -> f64;
-
-    #[link_name = "llvm.aarch64.neon.sminv.i8.v8i8"]
-    fn vminv_s8_(a: int8x8_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.sminv.i8.v16i8"]
-    fn vminvq_s8_(a: int8x16_t) -> i8;
-    #[link_name = "llvm.aarch64.neon.sminv.i16.v4i16"]
-    fn vminv_s16_(a: int16x4_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.sminv.i16.v8i16"]
-    fn vminvq_s16_(a: int16x8_t) -> i16;
-    #[link_name = "llvm.aarch64.neon.sminv.i32.v2i32"]
-    fn vminv_s32_(a: int32x2_t) -> i32;
-    #[link_name = "llvm.aarch64.neon.sminv.i32.v4i32"]
-    fn vminvq_s32_(a: int32x4_t) -> i32;
-
-    #[link_name = "llvm.aarch64.neon.uminv.i8.v8i8"]
-    fn vminv_u8_(a: uint8x8_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.uminv.i8.v16i8"]
-    fn vminvq_u8_(a: uint8x16_t) -> u8;
-    #[link_name = "llvm.aarch64.neon.uminv.i16.v4i16"]
-    fn vminv_u16_(a: uint16x4_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.uminv.i16.v8i16"]
-    fn vminvq_u16_(a: uint16x8_t) -> u16;
-    #[link_name = "llvm.aarch64.neon.uminv.i32.v2i32"]
-    fn vminv_u32_(a: uint32x2_t) -> u32;
-    #[link_name = "llvm.aarch64.neon.uminv.i32.v4i32"]
-    fn vminvq_u32_(a: uint32x4_t) -> u32;
-
-    #[link_name = "llvm.aarch64.neon.fminv.f32.v2f32"]
-    fn vminv_f32_(a: float32x2_t) -> f32;
-    #[link_name = "llvm.aarch64.neon.fminv.f32.v4f32"]
-    fn vminvq_f32_(a: float32x4_t) -> f32;
-    #[link_name = "llvm.aarch64.neon.fminv.f64.v2f64"]
-    fn vminvq_f64_(a: float64x2_t) -> f64;
-
-    #[link_name = "llvm.aarch64.neon.sminp.v16i8"]
-    fn vpminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    #[link_name = "llvm.aarch64.neon.sminp.v8i16"]
-    fn vpminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.sminp.v4i32"]
-    fn vpminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.uminp.v16i8"]
-    fn vpminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    #[link_name = "llvm.aarch64.neon.uminp.v8i16"]
-    fn vpminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    #[link_name = "llvm.aarch64.neon.uminp.v4i32"]
-    fn vpminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    #[link_name = "llvm.aarch64.neon.fminp.4f32"]
-    fn vpminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    #[link_name = "llvm.aarch64.neon.fminp.v2f64"]
-    fn vpminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.smaxp.v16i8"]
-    fn vpmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    #[link_name = "llvm.aarch64.neon.smaxp.v8i16"]
-    fn vpmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.smaxp.v4i32"]
-    fn vpmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.umaxp.v16i8"]
-    fn vpmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
-    #[link_name = "llvm.aarch64.neon.umaxp.v8i16"]
-    fn vpmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
-    #[link_name = "llvm.aarch64.neon.umaxp.v4i32"]
-    fn vpmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
-    #[link_name = "llvm.aarch64.neon.fmaxp.4f32"]
-    fn vpmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    #[link_name = "llvm.aarch64.neon.fmaxp.v2f64"]
-    fn vpmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.tbl1.v8i8"]
-    fn vqtbl1(a: int8x16_t, b: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbl1.v16i8"]
-    fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbx1.v8i8"]
-    fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbx1.v16i8"]
-    fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbl2.v8i8"]
-    fn vqtbl2(a0: int8x16_t, a1: int8x16_t, b: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbl2.v16i8"]
-    fn vqtbl2q(a0: int8x16_t, a1: int8x16_t, b: uint8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbx2.v8i8"]
-    fn vqtbx2(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, c: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbx2.v16i8"]
-    fn vqtbx2q(a: int8x16_t, b0: int8x16_t, b1: int8x16_t, c: uint8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbl3.v8i8"]
-    fn vqtbl3(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbl3.v16i8"]
-    fn vqtbl3q(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x16_t) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbx3.v8i8"]
-    fn vqtbx3(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, c: uint8x8_t) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbx3.v16i8"]
-    fn vqtbx3q(
-        a: int8x16_t,
-        b0: int8x16_t,
-        b1: int8x16_t,
-        b2: int8x16_t,
-        c: uint8x16_t,
-    ) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbl4.v8i8"]
-    fn vqtbl4(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, a3: int8x16_t, b: uint8x8_t)
-    -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.tbl4.v16i8"]
-    fn vqtbl4q(
-        a0: int8x16_t,
-        a1: int8x16_t,
-        a2: int8x16_t,
-        a3: int8x16_t,
-        b: uint8x16_t,
-    ) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.tbx4.v8i8"]
-    fn vqtbx4(
-        a: int8x8_t,
-        b0: int8x16_t,
-        b1: int8x16_t,
-        b2: int8x16_t,
-        b3: int8x16_t,
-        c: uint8x8_t,
-    ) -> int8x8_t;
-
-    #[link_name = "llvm.aarch64.neon.tbx4.v16i8"]
-    fn vqtbx4q(
-        a: int8x16_t,
-        b0: int8x16_t,
-        b1: int8x16_t,
-        b2: int8x16_t,
-        b3: int8x16_t,
-        c: uint8x16_t,
-    ) -> int8x16_t;
-
-    #[link_name = "llvm.aarch64.neon.vsli.v8i8"]
-    fn vsli_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v16i8"]
-    fn vsliq_n_s8_(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v4i16"]
-    fn vsli_n_s16_(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v8i16"]
-    fn vsliq_n_s16_(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v2i32"]
-    fn vsli_n_s32_(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v4i32"]
-    fn vsliq_n_s32_(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v1i64"]
-    fn vsli_n_s64_(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
-    #[link_name = "llvm.aarch64.neon.vsli.v2i64"]
-    fn vsliq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
-
-    #[link_name = "llvm.aarch64.neon.vsri.v8i8"]
-    fn vsri_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v16i8"]
-    fn vsriq_n_s8_(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v4i16"]
-    fn vsri_n_s16_(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v8i16"]
-    fn vsriq_n_s16_(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v2i32"]
-    fn vsri_n_s32_(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v4i32"]
-    fn vsriq_n_s32_(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v1i64"]
-    fn vsri_n_s64_(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
-    #[link_name = "llvm.aarch64.neon.vsri.v2i64"]
-    fn vsriq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
-}
-
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
@@ -485,3710 +190,445 @@ pub unsafe fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
     transmute::<f64, _>(simd_extract!(b, LANE2 as u32))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
+    vld1_f64(ptr)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(ld1r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
+    let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
+    simd_shuffle!(x, x, [0, 0])
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Load one single-element structure to one lane of one register.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
+    static_assert!(LANE == 0);
+    simd_insert!(src, LANE as u32, *ptr)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Load one single-element structure to one lane of one register.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_insert!(src, LANE as u32, *ptr)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
+/// to the corresponding bit from the first source SIMD&FP register when the original
+/// destination bit was 1, otherwise from the second source SIMD&FP register.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(bsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
+    let not = int64x1_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
-
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(bsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
+    let not = int64x1_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
-
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(bsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
+    let not = int64x2_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
-
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(bsl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
+    let not = int64x2_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(fadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
+    simd_add(a, b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(fadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
+    simd_add(a, b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(add))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_add(a, b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(add))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_add(a, b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(add))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
+    a.wrapping_add(b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(add))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
+    a.wrapping_add(b)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop, N = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(fmov))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vdup_n_p64(value: p64) -> poly64x1_t {
+    transmute(u64x1::new(value))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vdup_n_f64(value: f64) -> float64x1_t {
+    float64x1_t::splat(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(dup))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vdupq_n_p64(value: p64) -> poly64x2_t {
+    transmute(u64x2::new(value, value))
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
-    read_unaligned(ptr.cast())
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ldr))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
-    read_unaligned(ptr.cast())
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(dup))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vdupq_n_f64(value: f64) -> float64x2_t {
+    float64x2_t::splat(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(fmov))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vmov_n_p64(value: p64) -> poly64x1_t {
+    vdup_n_p64(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vmov_n_f64(value: f64) -> float64x1_t {
+    vdup_n_f64(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(dup))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vmovq_n_p64(value: p64) -> poly64x2_t {
+    vdupq_n_p64(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(test, assert_instr(dup))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
-    read_unaligned(ptr.cast())
+pub unsafe fn vmovq_n_f64(value: f64) -> float64x2_t {
+    vdupq_n_f64(value)
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ldr))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(mov))]
+#[cfg_attr(all(test, target_env = "msvc"), assert_instr(dup))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
-    vld1_f64(ptr)
+pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
+    float64x1_t([simd_extract!(a, 1)])
 }
 
-/// Load multiple single-element structures to one, two, three, or four registers
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld1r))]
+#[cfg_attr(test, assert_instr(ext))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
-    let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
-    simd_shuffle!(x, x, [0, 0])
+pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    transmute(u64x1::new(simd_extract!(a, 1)))
 }
 
-/// Load one single-element structure to one lane of one register.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
+#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
-    static_assert!(LANE == 0);
-    simd_insert!(src, LANE as u32, *ptr)
+pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
+    float64x1_t([simd_extract!(a, 0)])
 }
 
-/// Load one single-element structure to one lane of one register.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_insert!(src, LANE as u32, *ptr)
+pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    transmute(u64x1::new(simd_extract!(a, 0)))
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
-    write_unaligned(ptr.cast(), a);
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, IMM5 = 0)
+)]
+pub unsafe fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
+    static_assert!(IMM5 == 0);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
-    write_unaligned(ptr.cast(), a);
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop, IMM5 = 0)
+)]
+pub unsafe fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Vector combine
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[cfg_attr(test, assert_instr(mov))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
-    write_unaligned(ptr.cast(), a);
+pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
+    simd_shuffle!(low, high, [0, 1])
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
-    write_unaligned(ptr.cast(), a);
+pub unsafe fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert_uimm_bits!(N, 6);
+    a << N
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
-    write_unaligned(ptr.cast(), a);
+pub unsafe fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert_uimm_bits!(N, 6);
+    a << N
 }
 
-/// Store multiple single-element structures from one, two, three, or four registers.
+/// Signed shift right
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(str))]
-#[allow(clippy::cast_ptr_alignment)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
-    write_unaligned(ptr.cast(), a);
-}
-
-/// Absolute Value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(abs))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabsd_s64(a: i64) -> i64 {
-    vabsd_s64_(a)
-}
-/// Absolute Value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(abs))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabs_s64(a: int64x1_t) -> int64x1_t {
-    vabs_s64_(a)
-}
-/// Absolute Value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(abs))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
-    vabsq_s64_(a)
-}
-
-/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
-/// to the corresponding bit from the first source SIMD&FP register when the original
-/// destination bit was 1, otherwise from the second source SIMD&FP register.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(bsl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
-    let not = int64x1_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
-}
-/// Bitwise Select.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(bsl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
-    let not = int64x1_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
-}
-/// Bitwise Select. (128-bit)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(bsl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
-    let not = int64x2_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
-}
-/// Bitwise Select. (128-bit)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(bsl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
-    let not = int64x2_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
-}
-
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqadd_s8(a: int8x8_t, b: uint8x8_t) -> int8x8_t {
-    vuqadd_s8_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
-    vuqaddq_s8_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqadd_s16(a: int16x4_t, b: uint16x4_t) -> int16x4_t {
-    vuqadd_s16_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddq_s16(a: int16x8_t, b: uint16x8_t) -> int16x8_t {
-    vuqaddq_s16_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqadd_s32(a: int32x2_t, b: uint32x2_t) -> int32x2_t {
-    vuqadd_s32_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddq_s32(a: int32x4_t, b: uint32x4_t) -> int32x4_t {
-    vuqaddq_s32_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqadd_s64(a: int64x1_t, b: uint64x1_t) -> int64x1_t {
-    vuqadd_s64_(a, b)
-}
-/// Signed saturating Accumulate of Unsigned value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(suqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
-    vuqaddq_s64_(a, b)
-}
-
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadd_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    vsqadd_u8_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    vsqaddq_u8_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadd_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    vsqadd_u16_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    vsqaddq_u16_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadd_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    vsqadd_u32_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    vsqaddq_u32_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqadd_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    vsqadd_u64_(a, b)
-}
-/// Unsigned saturating Accumulate of Signed value.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(usqadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    vsqaddq_u64_(a, b)
-}
-
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    vpaddq_s16_(a, b)
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    transmute(vpaddq_s16_(transmute(a), transmute(b)))
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    vpaddq_s32_(a, b)
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    transmute(vpaddq_s32_(transmute(a), transmute(b)))
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    vpaddq_s64_(a, b)
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    transmute(vpaddq_s64_(transmute(a), transmute(b)))
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    vpaddq_s8_(a, b)
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    transmute(vpaddq_s8_(transmute(a), transmute(b)))
+pub unsafe fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 { 63 } else { N };
+    a >> n
 }
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddd_s64(a: int64x2_t) -> i64 {
-    transmute(vaddvq_u64_(transmute(a)))
-}
-/// Add pairwise
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpaddd_u64(a: uint64x2_t) -> u64 {
-    vaddvq_u64_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_s16(a: int16x4_t) -> i16 {
-    vaddv_s16_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_s32(a: int32x2_t) -> i32 {
-    vaddv_s32_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_s8(a: int8x8_t) -> i8 {
-    vaddv_s8_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_u16(a: uint16x4_t) -> u16 {
-    vaddv_u16_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_u32(a: uint32x2_t) -> u32 {
-    vaddv_u32_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddv_u8(a: uint8x8_t) -> u8 {
-    vaddv_u8_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_s16(a: int16x8_t) -> i16 {
-    vaddvq_s16_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_s32(a: int32x4_t) -> i32 {
-    vaddvq_s32_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_s8(a: int8x16_t) -> i8 {
-    vaddvq_s8_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_u16(a: uint16x8_t) -> u16 {
-    vaddvq_u16_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_u32(a: uint32x4_t) -> u32 {
-    vaddvq_u32_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_u8(a: uint8x16_t) -> u8 {
-    vaddvq_u8_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_s64(a: int64x2_t) -> i64 {
-    vaddvq_s64_(a)
-}
-
-/// Add across vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(addp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
-    vaddvq_u64_(a)
-}
-
-/// Signed Add Long across Vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_s8(a: int8x8_t) -> i16 {
-    vaddlv_s8_(a) as i16
-}
-
-/// Signed Add Long across Vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(saddlv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_s8(a: int8x16_t) -> i16 {
-    vaddlvq_s8_(a) as i16
-}
-
-/// Unsigned Add Long across Vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlv_u8(a: uint8x8_t) -> u16 {
-    vaddlv_u8_(a) as u16
-}
-
-/// Unsigned Add Long across Vector
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uaddlv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
-    vaddlvq_u8_(a) as u16
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    simd_add(a, b)
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fadd))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    simd_add(a, b)
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(add))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_add(a, b)
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(add))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_add(a, b)
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(add))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
-    a.wrapping_add(b)
-}
-
-/// Vector add.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(add))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
-    a.wrapping_add(b)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_s8(a: int8x8_t) -> i8 {
-    vmaxv_s8_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_s8(a: int8x16_t) -> i8 {
-    vmaxvq_s8_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_s16(a: int16x4_t) -> i16 {
-    vmaxv_s16_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_s16(a: int16x8_t) -> i16 {
-    vmaxvq_s16_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_s32(a: int32x2_t) -> i32 {
-    vmaxv_s32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_s32(a: int32x4_t) -> i32 {
-    vmaxvq_s32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_u8(a: uint8x8_t) -> u8 {
-    vmaxv_u8_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_u8(a: uint8x16_t) -> u8 {
-    vmaxvq_u8_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_u16(a: uint16x4_t) -> u16 {
-    vmaxv_u16_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_u16(a: uint16x8_t) -> u16 {
-    vmaxvq_u16_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_u32(a: uint32x2_t) -> u32 {
-    vmaxv_u32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_u32(a: uint32x4_t) -> u32 {
-    vmaxvq_u32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxv_f32(a: float32x2_t) -> f32 {
-    vmaxv_f32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_f32(a: float32x4_t) -> f32 {
-    vmaxvq_f32_(a)
-}
-
-/// Horizontal vector max.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmaxvq_f64(a: float64x2_t) -> f64 {
-    vmaxvq_f64_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_s8(a: int8x8_t) -> i8 {
-    vminv_s8_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_s8(a: int8x16_t) -> i8 {
-    vminvq_s8_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_s16(a: int16x4_t) -> i16 {
-    vminv_s16_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_s16(a: int16x8_t) -> i16 {
-    vminvq_s16_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_s32(a: int32x2_t) -> i32 {
-    vminv_s32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_s32(a: int32x4_t) -> i32 {
-    vminvq_s32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_u8(a: uint8x8_t) -> u8 {
-    vminv_u8_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_u8(a: uint8x16_t) -> u8 {
-    vminvq_u8_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_u16(a: uint16x4_t) -> u16 {
-    vminv_u16_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_u16(a: uint16x8_t) -> u16 {
-    vminvq_u16_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_u32(a: uint32x2_t) -> u32 {
-    vminv_u32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_u32(a: uint32x4_t) -> u32 {
-    vminvq_u32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminv_f32(a: float32x2_t) -> f32 {
-    vminv_f32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminv))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_f32(a: float32x4_t) -> f32 {
-    vminvq_f32_(a)
-}
-
-/// Horizontal vector min.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 {
-    vminvq_f64_(a)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    vpminq_s8_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    vpminq_s16_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    vpminq_s32_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    vpminq_u8_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    vpminq_u16_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    vpminq_u32_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    vpminq_f32_(a, b)
-}
-
-/// Folding minimum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fminp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    vpminq_f64_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    vpmaxq_s8_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    vpmaxq_s16_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    vpmaxq_s32_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    vpmaxq_u8_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    vpmaxq_u16_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    vpmaxq_u32_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    vpmaxq_f32_(a, b)
-}
-
-/// Folding maximum of adjacent pairs
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmaxp))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    vpmaxq_f64_(a, b)
-}
-
-/// Extract vector from pair of vectors
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N == 0);
-    a
-}
-
-/// Extract vector from pair of vectors
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
-    static_assert!(N == 0);
-    a
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmov))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_n_p64(value: p64) -> poly64x1_t {
-    transmute(u64x1::new(value))
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdup_n_f64(value: f64) -> float64x1_t {
-    float64x1_t::splat(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_n_p64(value: p64) -> poly64x2_t {
-    transmute(u64x2::new(value, value))
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vdupq_n_f64(value: f64) -> float64x2_t {
-    float64x2_t::splat(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmov))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmov_n_p64(value: p64) -> poly64x1_t {
-    vdup_n_p64(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmov_n_f64(value: f64) -> float64x1_t {
-    vdup_n_f64(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovq_n_p64(value: p64) -> poly64x2_t {
-    vdupq_n_p64(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(dup))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vmovq_n_f64(value: f64) -> float64x2_t {
-    vdupq_n_f64(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(mov))]
-#[cfg_attr(all(test, target_env = "msvc"), assert_instr(dup))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
-    float64x1_t([simd_extract!(a, 1)])
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
-    transmute(u64x1::new(simd_extract!(a, 1)))
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
-    float64x1_t([simd_extract!(a, 0)])
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
-    transmute(u64x1::new(simd_extract!(a, 0)))
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, IMM5 = 0)
-)]
-pub unsafe fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
-    static_assert!(IMM5 == 0);
-    simd_extract!(v, IMM5 as u32)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, IMM5 = 0)
-)]
-pub unsafe fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
-    simd_shuffle!(low, high, [0, 1])
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vqtbl1_s8(vcombine_s8(a, zeroed()), transmute(b))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl1_u8(vcombine_u8(a, zeroed()), b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl1_p8(vcombine_p8(a, zeroed()), b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    vqtbl1_s8(vcombine_s8(a.0, a.1), transmute(b))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
-    vqtbl2_s8(
-        int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, zeroed())),
-        transmute(b),
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl2_u8(
-        uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, zeroed())),
-        b,
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl2_p8(
-        poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, zeroed())),
-        b,
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
-    vqtbl2_s8(
-        int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)),
-        transmute(b),
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    vqtbl2_u8(
-        uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)),
-        b,
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    vqtbl2_p8(
-        poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)),
-        b,
-    )
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    let r = vqtbx1_s8(a, vcombine_s8(b, zeroed()), transmute(c));
-    let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(8)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    let r = vqtbx1_u8(a, vcombine_u8(b, zeroed()), c);
-    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
-    let r = vqtbx1_p8(a, vcombine_p8(b, zeroed()), c);
-    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    vqtbx1_s8(a, vcombine_s8(b.0, b.1), transmute(c))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    vqtbx1_u8(a, vcombine_u8(b.0, b.1), c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    vqtbx1_p8(a, vcombine_p8(b.0, b.1), c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
-    let r = vqtbx2_s8(
-        a,
-        int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, zeroed())),
-        transmute(c),
-    );
-    let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(24)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let r = vqtbx2_u8(
-        a,
-        uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, zeroed())),
-        c,
-    );
-    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let r = vqtbx2_p8(
-        a,
-        poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, zeroed())),
-        c,
-    );
-    let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24)));
-    simd_select(m, r, a)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
-    vqtbx2_s8(
-        a,
-        int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)),
-        transmute(c),
-    )
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    vqtbx2_u8(
-        a,
-        uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)),
-        c,
-    )
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    vqtbx2_p8(
-        a,
-        poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)),
-        c,
-    )
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1_s8(t: int8x16_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbl1(t, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1q_s8(t: int8x16_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbl1q(t, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1_u8(t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbl1(transmute(t), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbl1q(transmute(t), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1_p8(t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbl1(transmute(t), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl1q_p8(t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbl1q(transmute(t), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1_s8(a: int8x8_t, t: int8x16_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbx1(a, t, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1q_s8(a: int8x16_t, t: int8x16_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbx1q(a, t, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1_u8(a: uint8x8_t, t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbx1(transmute(a), transmute(t), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1q_u8(a: uint8x16_t, t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbx1q(transmute(a), transmute(t), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1_p8(a: poly8x8_t, t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbx1(transmute(a), transmute(t), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx1q_p8(a: poly8x16_t, t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbx1q(transmute(a), transmute(t), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2_s8(t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbl2(t.0, t.1, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2q_s8(t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbl2q(t.0, t.1, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2_u8(t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbl2(transmute(t.0), transmute(t.1), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2q_u8(t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbl2q(transmute(t.0), transmute(t.1), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2_p8(t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbl2(transmute(t.0), transmute(t.1), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl2q_p8(t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbl2q(transmute(t.0), transmute(t.1), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2_s8(a: int8x8_t, t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbx2(a, t.0, t.1, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2q_s8(a: int8x16_t, t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbx2q(a, t.0, t.1, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2_u8(a: uint8x8_t, t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbx2(transmute(a), transmute(t.0), transmute(t.1), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2q_u8(a: uint8x16_t, t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbx2q(transmute(a), transmute(t.0), transmute(t.1), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2_p8(a: poly8x8_t, t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbx2(transmute(a), transmute(t.0), transmute(t.1), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx2q_p8(a: poly8x16_t, t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbx2q(transmute(a), transmute(t.0), transmute(t.1), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3_s8(t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbl3(t.0, t.1, t.2, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3q_s8(t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbl3q(t.0, t.1, t.2, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3_u8(t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbl3(transmute(t.0), transmute(t.1), transmute(t.2), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3q_u8(t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbl3q(transmute(t.0), transmute(t.1), transmute(t.2), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3_p8(t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbl3(transmute(t.0), transmute(t.1), transmute(t.2), idx))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl3q_p8(t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbl3q(transmute(t.0), transmute(t.1), transmute(t.2), idx))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3_s8(a: int8x8_t, t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbx3(a, t.0, t.1, t.2, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3q_s8(a: int8x16_t, t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbx3q(a, t.0, t.1, t.2, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3_u8(a: uint8x8_t, t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbx3(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3q_u8(a: uint8x16_t, t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbx3q(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3_p8(a: poly8x8_t, t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbx3(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx3q_p8(a: poly8x16_t, t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbx3q(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        idx,
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4_s8(t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbl4(t.0, t.1, t.2, t.3, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4q_s8(t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbl4q(t.0, t.1, t.2, t.3, idx)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4_u8(t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbl4(
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4q_u8(t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbl4q(
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4_p8(t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbl4(
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbl4q_p8(t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbl4q(
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4_s8(a: int8x8_t, t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t {
-    vqtbx4(a, t.0, t.1, t.2, t.3, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4q_s8(a: int8x16_t, t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t {
-    vqtbx4q(a, t.0, t.1, t.2, t.3, idx)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4_u8(a: uint8x8_t, t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t {
-    transmute(vqtbx4(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4q_u8(a: uint8x16_t, t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t {
-    transmute(vqtbx4q(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4_p8(a: poly8x8_t, t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t {
-    transmute(vqtbx4(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t {
-    transmute(vqtbx4q(
-        transmute(a),
-        transmute(t.0),
-        transmute(t.1),
-        transmute(t.2),
-        transmute(t.3),
-        idx,
-    ))
-}
-
-/// Shift left
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert_uimm_bits!(N, 6);
-    a << N
-}
-
-/// Shift left
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert_uimm_bits!(N, 6);
-    a << N
-}
-
-/// Signed shift right
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 { 63 } else { N };
-    a >> n
-}
-
-/// Unsigned shift right
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    let n: i32 = if N == 64 {
-        return 0;
-    } else {
-        N
-    };
-    a >> n
-}
-
-/// Signed shift right and accumulate
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
-    static_assert!(N >= 1 && N <= 64);
-    a.wrapping_add(vshrd_n_s64::<N>(b))
-}
-
-/// Unsigned shift right and accumulate
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N = 2))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
-    static_assert!(N >= 1 && N <= 64);
-    a.wrapping_add(vshrd_n_u64::<N>(b))
-}
-
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vsli_n_s8_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vsliq_n_s8_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vsli_n_s16_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vsliq_n_s16_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    vsli_n_s32_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    vsliq_n_s32_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    vsli_n_s64_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    vsliq_n_s64_(a, b, N)
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    transmute(vsli_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    transmute(vsliq_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    transmute(vsli_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    transmute(vsliq_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    transmute(vsli_n_s32_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    transmute(vsliq_n_s32_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_s64_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsliq_n_s64_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    transmute(vsli_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    transmute(vsliq_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    transmute(vsli_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    transmute(vsliq_n_s16_(transmute(a), transmute(b), N))
-}
-
-/// Shift Left and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsli_n_s64_(transmute(a), transmute(b), N))
-}
-
-/// Shift Left and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sli, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert!(N >= 0 && N <= 63);
-    transmute(vsliq_n_s64_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    vsri_n_s8_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    vsriq_n_s8_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    vsri_n_s16_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    vsriq_n_s16_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    vsri_n_s32_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    vsriq_n_s32_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    vsri_n_s64_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    vsriq_n_s64_(a, b, N)
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    transmute(vsri_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    transmute(vsriq_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    transmute(vsri_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    transmute(vsriq_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    transmute(vsri_n_s32_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    transmute(vsriq_n_s32_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_s64_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    transmute(vsri_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert!(N >= 1 && N <= 8);
-    transmute(vsriq_n_s8_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    transmute(vsri_n_s16_(transmute(a), transmute(b), N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert!(N >= 1 && N <= 16);
-    transmute(vsriq_n_s16_(transmute(a), transmute(b), N))
-}
-
-/// Shift Right and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsri_n_s64_(transmute(a), transmute(b), N))
-}
-
-/// Shift Right and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(sri, N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
-    static_assert!(N >= 1 && N <= 64);
-    transmute(vsriq_n_s64_(transmute(a), transmute(b), N))
-}
-
-/// SM3TT1A
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt1a, IMM2 = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3tt1aq_u32<const IMM2: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(IMM2, 2);
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt1a"
-        )]
-        fn vsm3tt1aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
-    }
-    vsm3tt1aq_u32_(a, b, c, IMM2 as i64)
-}
-
-/// SM3TT1B
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt1b, IMM2 = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3tt1bq_u32<const IMM2: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(IMM2, 2);
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt1b"
-        )]
-        fn vsm3tt1bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
-    }
-    vsm3tt1bq_u32_(a, b, c, IMM2 as i64)
-}
-
-/// SM3TT2A
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt2a, IMM2 = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3tt2aq_u32<const IMM2: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(IMM2, 2);
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt2a"
-        )]
-        fn vsm3tt2aq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
-    }
-    vsm3tt2aq_u32_(a, b, c, IMM2 as i64)
-}
-
-/// SM3TT2B
-#[inline]
-#[target_feature(enable = "neon,sm4")]
-#[cfg_attr(test, assert_instr(sm3tt2b, IMM2 = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_sm4", issue = "117226")]
-pub unsafe fn vsm3tt2bq_u32<const IMM2: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(IMM2, 2);
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.sm3tt2b"
-        )]
-        fn vsm3tt2bq_u32_(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t, imm2: i64) -> uint32x4_t;
-    }
-    vsm3tt2bq_u32_(a, b, c, IMM2 as i64)
-}
-
-/// Exclusive OR and rotate
-#[inline]
-#[target_feature(enable = "neon,sha3")]
-#[cfg_attr(test, assert_instr(xar, IMM6 = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "stdarch_neon_sha3", since = "1.79.0")]
-pub unsafe fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(IMM6, 6);
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.crypto.xar"
-        )]
-        fn vxarq_u64_(a: uint64x2_t, b: uint64x2_t, n: i64) -> uint64x2_t;
-    }
-    vxarq_u64_(a, b, IMM6 as i64)
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::core_arch::aarch64::test_support::*;
-    use crate::core_arch::arm_shared::test_support::*;
-    use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
-    use std::mem::transmute;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqadd_s8() {
-        let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);
-        let b = u8x8::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7);
-        let e = i8x8::new(i8::MAX, -2, 0, 2, 4, 6, 8, i8::MAX);
-        let r: i8x8 = transmute(vuqadd_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddq_s8() {
-        let a = i8x16::new(
-            i8::MIN,
-            -7,
-            -6,
-            -5,
-            -4,
-            -3,
-            -2,
-            -1,
-            0,
-            1,
-            2,
-            3,
-            4,
-            5,
-            6,
-            i8::MAX,
-        );
-        let b = u8x16::new(u8::MAX, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let e = i8x16::new(
-            i8::MAX,
-            -6,
-            -4,
-            -2,
-            0,
-            2,
-            4,
-            6,
-            8,
-            10,
-            12,
-            14,
-            16,
-            18,
-            20,
-            i8::MAX,
-        );
-        let r: i8x16 = transmute(vuqaddq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqadd_s16() {
-        let a = i16x4::new(i16::MIN, -1, 0, i16::MAX);
-        let b = u16x4::new(u16::MAX, 1, 2, 3);
-        let e = i16x4::new(i16::MAX, 0, 2, i16::MAX);
-        let r: i16x4 = transmute(vuqadd_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddq_s16() {
-        let a = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, i16::MAX);
-        let b = u16x8::new(u16::MAX, 1, 2, 3, 4, 5, 6, 7);
-        let e = i16x8::new(i16::MAX, -2, 0, 2, 4, 6, 8, i16::MAX);
-        let r: i16x8 = transmute(vuqaddq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqadd_s32() {
-        let a = i32x2::new(i32::MIN, i32::MAX);
-        let b = u32x2::new(u32::MAX, 1);
-        let e = i32x2::new(i32::MAX, i32::MAX);
-        let r: i32x2 = transmute(vuqadd_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddq_s32() {
-        let a = i32x4::new(i32::MIN, -1, 0, i32::MAX);
-        let b = u32x4::new(u32::MAX, 1, 2, 3);
-        let e = i32x4::new(i32::MAX, 0, 2, i32::MAX);
-        let r: i32x4 = transmute(vuqaddq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqadd_s64() {
-        let a = i64x1::new(i64::MIN);
-        let b = u64x1::new(u64::MAX);
-        let e = i64x1::new(i64::MAX);
-        let r: i64x1 = transmute(vuqadd_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vuqaddq_s64() {
-        let a = i64x2::new(i64::MIN, i64::MAX);
-        let b = u64x2::new(u64::MAX, 1);
-        let e = i64x2::new(i64::MAX, i64::MAX);
-        let r: i64x2 = transmute(vuqaddq_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqadd_u8() {
-        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX);
-        let b = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, 3);
-        let e = u8x8::new(0, 0, 0, 2, 4, 6, 8, u8::MAX);
-        let r: u8x8 = transmute(vsqadd_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddq_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX);
-        let b = i8x16::new(i8::MIN, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7);
-        let e = u8x16::new(0, 0, 0, 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, u8::MAX);
-        let r: u8x16 = transmute(vsqaddq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqadd_u16() {
-        let a = u16x4::new(0, 1, 2, u16::MAX);
-        let b = i16x4::new(i16::MIN, -1, 0, 1);
-        let e = u16x4::new(0, 0, 2, u16::MAX);
-        let r: u16x4 = transmute(vsqadd_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddq_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX);
-        let b = i16x8::new(i16::MIN, -3, -2, -1, 0, 1, 2, 3);
-        let e = u16x8::new(0, 0, 0, 2, 4, 6, 8, u16::MAX);
-        let r: u16x8 = transmute(vsqaddq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqadd_u32() {
-        let a = u32x2::new(0, u32::MAX);
-        let b = i32x2::new(i32::MIN, 1);
-        let e = u32x2::new(0, u32::MAX);
-        let r: u32x2 = transmute(vsqadd_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddq_u32() {
-        let a = u32x4::new(0, 1, 2, u32::MAX);
-        let b = i32x4::new(i32::MIN, -1, 0, 1);
-        let e = u32x4::new(0, 0, 2, u32::MAX);
-        let r: u32x4 = transmute(vsqaddq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqadd_u64() {
-        let a = u64x1::new(0);
-        let b = i64x1::new(i64::MIN);
-        let e = u64x1::new(0);
-        let r: u64x1 = transmute(vsqadd_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsqaddq_u64() {
-        let a = u64x2::new(0, u64::MAX);
-        let b = i64x2::new(i64::MIN, 1);
-        let e = u64x2::new(0, u64::MAX);
-        let r: u64x2 = transmute(vsqaddq_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7);
-        let r: i16x8 = transmute(vpaddq_s16(transmute(a), transmute(b)));
-        let e = i16x8::new(3, 7, 11, 15, -1, -5, -9, -13);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let b = i32x4::new(0, -1, -2, -3);
-        let r: i32x4 = transmute(vpaddq_s32(transmute(a), transmute(b)));
-        let e = i32x4::new(3, 7, -1, -5);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_s64() {
-        let a = i64x2::new(1, 2);
-        let b = i64x2::new(0, -1);
-        let r: i64x2 = transmute(vpaddq_s64(transmute(a), transmute(b)));
-        let e = i64x2::new(3, -1);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let b = i8x16::new(
-            0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15,
-        );
-        let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b)));
-        let e = i8x16::new(
-            3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29,
-        );
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let b = u16x8::new(17, 18, 19, 20, 20, 21, 22, 23);
-        let r: u16x8 = transmute(vpaddq_u16(transmute(a), transmute(b)));
-        let e = u16x8::new(1, 5, 9, 13, 35, 39, 41, 45);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_u32() {
-        let a = u32x4::new(0, 1, 2, 3);
-        let b = u32x4::new(17, 18, 19, 20);
-        let r: u32x4 = transmute(vpaddq_u32(transmute(a), transmute(b)));
-        let e = u32x4::new(1, 5, 35, 39);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_u64() {
-        let a = u64x2::new(0, 1);
-        let b = u64x2::new(17, 18);
-        let r: u64x2 = transmute(vpaddq_u64(transmute(a), transmute(b)));
-        let e = u64x2::new(1, 35);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddq_u8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let b = i8x16::new(
-            17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 29, 29, 30, 31,
-        );
-        let r = i8x16::new(1, 5, 9, 13, 17, 21, 25, 29, 35, 39, 41, 45, 49, 53, 58, 61);
-        let e: i8x16 = transmute(vpaddq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddd_s64() {
-        let a = i64x2::new(2, -3);
-        let r: i64 = vpaddd_s64(transmute(a));
-        let e = -1_i64;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddd_u64() {
-        let a = i64x2::new(2, 3);
-        let r: u64 = vpaddd_u64(transmute(a));
-        let e = 5_u64;
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_f64() {
-        let a = 1.;
-        let b = 8.;
-        let e = 9.;
-        let r: f64 = transmute(vadd_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_f64() {
-        let a = f64x2::new(1., 2.);
-        let b = f64x2::new(8., 7.);
-        let e = f64x2::new(9., 9.);
-        let r: f64x2 = transmute(vaddq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s64() {
-        let a = 1_i64;
-        let b = 8_i64;
-        let e = 9_i64;
-        let r: i64 = transmute(vadd_s64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u64() {
-        let a = 1_u64;
-        let b = 8_u64;
-        let e = 9_u64;
-        let r: u64 = transmute(vadd_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_s64() {
-        let a = 1_i64;
-        let b = 8_i64;
-        let e = 9_i64;
-        let r: i64 = vaddd_s64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_u64() {
-        let a = 1_u64;
-        let b = 8_u64;
-        let e = 9_u64;
-        let r: u64 = vaddd_u64(a, b);
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_s8() {
-        let r = vmaxv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
-        assert_eq!(r, 7_i8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_s8() {
-        #[rustfmt::skip]
-        let r = vmaxvq_s8(transmute(i8x16::new(
-            1, 2, 3, 4,
-            -16, 6, 7, 5,
-            8, 1, 1, 1,
-            1, 1, 1, 1,
-        )));
-        assert_eq!(r, 8_i8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_s16() {
-        let r = vmaxv_s16(transmute(i16x4::new(1, 2, -4, 3)));
-        assert_eq!(r, 3_i16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_s16() {
-        let r = vmaxvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
-        assert_eq!(r, 7_i16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_s32() {
-        let r = vmaxv_s32(transmute(i32x2::new(1, -4)));
-        assert_eq!(r, 1_i32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_s32() {
-        let r = vmaxvq_s32(transmute(i32x4::new(1, 2, -32, 4)));
-        assert_eq!(r, 4_i32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_u8() {
-        let r = vmaxv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
-        assert_eq!(r, 8_u8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_u8() {
-        #[rustfmt::skip]
-        let r = vmaxvq_u8(transmute(u8x16::new(
-            1, 2, 3, 4,
-            16, 6, 7, 5,
-            8, 1, 1, 1,
-            1, 1, 1, 1,
-        )));
-        assert_eq!(r, 16_u8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_u16() {
-        let r = vmaxv_u16(transmute(u16x4::new(1, 2, 4, 3)));
-        assert_eq!(r, 4_u16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_u16() {
-        let r = vmaxvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
-        assert_eq!(r, 16_u16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_u32() {
-        let r = vmaxv_u32(transmute(u32x2::new(1, 4)));
-        assert_eq!(r, 4_u32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_u32() {
-        let r = vmaxvq_u32(transmute(u32x4::new(1, 2, 32, 4)));
-        assert_eq!(r, 32_u32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxv_f32() {
-        let r = vmaxv_f32(transmute(f32x2::new(1., 4.)));
-        assert_eq!(r, 4_f32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_f32() {
-        let r = vmaxvq_f32(transmute(f32x4::new(1., 2., 32., 4.)));
-        assert_eq!(r, 32_f32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmaxvq_f64() {
-        let r = vmaxvq_f64(transmute(f64x2::new(1., 4.)));
-        assert_eq!(r, 4_f64);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_s8() {
-        let r = vminv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));
-        assert_eq!(r, -8_i8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_s8() {
-        #[rustfmt::skip]
-        let r = vminvq_s8(transmute(i8x16::new(
-            1, 2, 3, 4,
-            -16, 6, 7, 5,
-            8, 1, 1, 1,
-            1, 1, 1, 1,
-        )));
-        assert_eq!(r, -16_i8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_s16() {
-        let r = vminv_s16(transmute(i16x4::new(1, 2, -4, 3)));
-        assert_eq!(r, -4_i16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_s16() {
-        let r = vminvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5)));
-        assert_eq!(r, -16_i16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_s32() {
-        let r = vminv_s32(transmute(i32x2::new(1, -4)));
-        assert_eq!(r, -4_i32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_s32() {
-        let r = vminvq_s32(transmute(i32x4::new(1, 2, -32, 4)));
-        assert_eq!(r, -32_i32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_u8() {
-        let r = vminv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5)));
-        assert_eq!(r, 1_u8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_u8() {
-        #[rustfmt::skip]
-        let r = vminvq_u8(transmute(u8x16::new(
-            1, 2, 3, 4,
-            16, 6, 7, 5,
-            8, 1, 1, 1,
-            1, 1, 1, 1,
-        )));
-        assert_eq!(r, 1_u8);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_u16() {
-        let r = vminv_u16(transmute(u16x4::new(1, 2, 4, 3)));
-        assert_eq!(r, 1_u16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_u16() {
-        let r = vminvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5)));
-        assert_eq!(r, 1_u16);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_u32() {
-        let r = vminv_u32(transmute(u32x2::new(1, 4)));
-        assert_eq!(r, 1_u32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_u32() {
-        let r = vminvq_u32(transmute(u32x4::new(1, 2, 32, 4)));
-        assert_eq!(r, 1_u32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminv_f32() {
-        let r = vminv_f32(transmute(f32x2::new(1., 4.)));
-        assert_eq!(r, 1_f32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_f32() {
-        let r = vminvq_f32(transmute(f32x4::new(1., 2., 32., 4.)));
-        assert_eq!(r, 1_f32);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vminvq_f64() {
-        let r = vminvq_f64(transmute(f64x2::new(1., 4.)));
-        assert_eq!(r, 1_f64);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_s8() {
-        #[rustfmt::skip]
-        let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        #[rustfmt::skip]
-        let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
-        #[rustfmt::skip]
-        let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
-        let r: i8x16 = transmute(vpminq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_s16() {
-        let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
-        let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = i16x8::new(-2, 3, 5, 7, 0, 2, 4, 6);
-        let r: i16x8 = transmute(vpminq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_s32() {
-        let a = i32x4::new(1, -2, 3, 4);
-        let b = i32x4::new(0, 3, 2, 5);
-        let e = i32x4::new(-2, 3, 0, 2);
-        let r: i32x4 = transmute(vpminq_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_u8() {
-        #[rustfmt::skip]
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        #[rustfmt::skip]
-        let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
-        #[rustfmt::skip]
-        let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6);
-        let r: u8x16 = transmute(vpminq_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = u16x8::new(1, 3, 5, 7, 0, 2, 4, 6);
-        let r: u16x8 = transmute(vpminq_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpminq_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let b = u32x4::new(0, 3, 2, 5);
-        let e = u32x4::new(1, 3, 0, 2);
-        let r: u32x4 = transmute(vpminq_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_f32() {
-        let a = f32x4::new(1., -2., 3., 4.);
-        let b = f32x4::new(0., 3., 2., 5.);
-        let e = f32x4::new(-2., 3., 0., 2.);
-        let r: f32x4 = transmute(vpminq_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+/// Unsigned shift right
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    let n: i32 = if N == 64 {
+        return 0;
+    } else {
+        N
+    };
+    a >> n
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_f64() {
-        let a = f64x2::new(1., -2.);
-        let b = f64x2::new(0., 3.);
-        let e = f64x2::new(-2., 0.);
-        let r: f64x2 = transmute(vpminq_f64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+/// Signed shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
+    static_assert!(N >= 1 && N <= 64);
+    a.wrapping_add(vshrd_n_s64::<N>(b))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_s8() {
-        #[rustfmt::skip]
-        let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        #[rustfmt::skip]
-        let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
-        #[rustfmt::skip]
-        let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
-        let r: i8x16 = transmute(vpmaxq_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+/// Unsigned shift right and accumulate
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, N = 2))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
+    static_assert!(N >= 1 && N <= 64);
+    a.wrapping_add(vshrd_n_u64::<N>(b))
+}
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_s16() {
-        let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8);
-        let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = i16x8::new(1, 4, 6, 8, 3, 5, 7, 9);
-        let r: i16x8 = transmute(vpmaxq_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::aarch64::test_support::*;
+    use crate::core_arch::arm_shared::test_support::*;
+    use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
+    use std::mem::transmute;
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_s32() {
-        let a = i32x4::new(1, -2, 3, 4);
-        let b = i32x4::new(0, 3, 2, 5);
-        let e = i32x4::new(1, 4, 3, 5);
-        let r: i32x4 = transmute(vpmaxq_s32(transmute(a), transmute(b)));
+    unsafe fn test_vadd_f64() {
+        let a = 1.;
+        let b = 8.;
+        let e = 9.;
+        let r: f64 = transmute(vadd_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_u8() {
-        #[rustfmt::skip]
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
-        #[rustfmt::skip]
-        let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9);
-        #[rustfmt::skip]
-        let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9);
-        let r: u8x16 = transmute(vpmaxq_u8(transmute(a), transmute(b)));
+    unsafe fn test_vaddq_f64() {
+        let a = f64x2::new(1., 2.);
+        let b = f64x2::new(8., 7.);
+        let e = f64x2::new(9., 9.);
+        let r: f64x2 = transmute(vaddq_f64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = u16x8::new(2, 4, 6, 8, 3, 5, 7, 9);
-        let r: u16x8 = transmute(vpmaxq_u16(transmute(a), transmute(b)));
+    unsafe fn test_vadd_s64() {
+        let a = 1_i64;
+        let b = 8_i64;
+        let e = 9_i64;
+        let r: i64 = transmute(vadd_s64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmaxq_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let b = u32x4::new(0, 3, 2, 5);
-        let e = u32x4::new(2, 4, 3, 5);
-        let r: u32x4 = transmute(vpmaxq_u32(transmute(a), transmute(b)));
+    unsafe fn test_vadd_u64() {
+        let a = 1_u64;
+        let b = 8_u64;
+        let e = 9_u64;
+        let r: u64 = transmute(vadd_u64(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_f32() {
-        let a = f32x4::new(1., -2., 3., 4.);
-        let b = f32x4::new(0., 3., 2., 5.);
-        let e = f32x4::new(1., 4., 3., 5.);
-        let r: f32x4 = transmute(vpmaxq_f32(transmute(a), transmute(b)));
+    unsafe fn test_vaddd_s64() {
+        let a = 1_i64;
+        let b = 8_i64;
+        let e = 9_i64;
+        let r: i64 = vaddd_s64(a, b);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_f64() {
-        let a = f64x2::new(1., -2.);
-        let b = f64x2::new(0., 3.);
-        let e = f64x2::new(1., 3.);
-        let r: f64x2 = transmute(vpmaxq_f64(transmute(a), transmute(b)));
+    unsafe fn test_vaddd_u64() {
+        let a = 1_u64;
+        let b = 8_u64;
+        let e = 9_u64;
+        let r: u64 = vaddd_u64(a, b);
         assert_eq!(r, e);
     }
 
@@ -4444,291 +884,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u64() {
-        test_cmp_u64(
-            |i, j| vceq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u64() {
-        testq_cmp_u64(
-            |i, j| vceqq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s64() {
-        test_cmp_s64(
-            |i, j| vceq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s64() {
-        testq_cmp_s64(
-            |i, j| vceqq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_p64() {
-        test_cmp_p64(
-            |i, j| vceq_p64(i, j),
-            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_p64() {
-        testq_cmp_p64(
-            |i, j| vceqq_p64(i, j),
-            |a: u64, b: u64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f64() {
-        test_cmp_f64(
-            |i, j| vceq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f64() {
-        testq_cmp_f64(
-            |i, j| vceqq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a == b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s64() {
-        test_cmp_s64(
-            |i, j| vcgt_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s64() {
-        testq_cmp_s64(
-            |i, j| vcgtq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u64() {
-        test_cmp_u64(
-            |i, j| vcgt_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u64() {
-        testq_cmp_u64(
-            |i, j| vcgtq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f64() {
-        test_cmp_f64(
-            |i, j| vcgt_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f64() {
-        testq_cmp_f64(
-            |i, j| vcgtq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a > b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s64() {
-        test_cmp_s64(
-            |i, j| vclt_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s64() {
-        testq_cmp_s64(
-            |i, j| vcltq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u64() {
-        test_cmp_u64(
-            |i, j| vclt_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u64() {
-        testq_cmp_u64(
-            |i, j| vcltq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vltq_f64() {
-        test_cmp_f64(
-            |i, j| vclt_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f64() {
-        testq_cmp_f64(
-            |i, j| vcltq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a < b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s64() {
-        test_cmp_s64(
-            |i, j| vcle_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s64() {
-        testq_cmp_s64(
-            |i, j| vcleq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u64() {
-        test_cmp_u64(
-            |i, j| vcle_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u64() {
-        testq_cmp_u64(
-            |i, j| vcleq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vleq_f64() {
-        test_cmp_f64(
-            |i, j| vcle_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f64() {
-        testq_cmp_f64(
-            |i, j| vcleq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a <= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s64() {
-        test_cmp_s64(
-            |i, j| vcge_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s64() {
-        testq_cmp_s64(
-            |i, j| vcgeq_s64(i, j),
-            |a: i64, b: i64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u64() {
-        test_cmp_u64(
-            |i, j| vcge_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u64() {
-        testq_cmp_u64(
-            |i, j| vcgeq_u64(i, j),
-            |a: u64, b: u64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgeq_f64() {
-        test_cmp_f64(
-            |i, j| vcge_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f64() {
-        testq_cmp_f64(
-            |i, j| vcgeq_f64(i, j),
-            |a: f64, b: f64| -> u64 { if a >= b { 0xFFFFFFFFFFFFFFFF } else { 0 } },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_f64() {
-        test_ari_f64(|i, j| vmul_f64(i, j), |a: f64, b: f64| -> f64 { a * b });
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_f64() {
-        testq_ari_f64(|i, j| vmulq_f64(i, j), |a: f64, b: f64| -> f64 { a * b });
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_f64() {
-        test_ari_f64(|i, j| vsub_f64(i, j), |a: f64, b: f64| -> f64 { a - b });
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_f64() {
-        testq_ari_f64(|i, j| vsubq_f64(i, j), |a: f64, b: f64| -> f64 { a - b });
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsd_s64() {
-        assert_eq!(vabsd_s64(-1), 1);
-        assert_eq!(vabsd_s64(0), 0);
-        assert_eq!(vabsd_s64(1), 1);
-        assert_eq!(vabsd_s64(i64::MIN), i64::MIN);
-        assert_eq!(vabsd_s64(i64::MIN + 1), i64::MAX);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_s64() {
-        let a = i64x1::new(i64::MIN);
-        let r: i64x1 = transmute(vabs_s64(transmute(a)));
-        let e = i64x1::new(i64::MIN);
-        assert_eq!(r, e);
-        let a = i64x1::new(i64::MIN + 1);
-        let r: i64x1 = transmute(vabs_s64(transmute(a)));
-        let e = i64x1::new(i64::MAX);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_s64() {
-        let a = i64x2::new(i64::MIN, i64::MIN + 1);
-        let r: i64x2 = transmute(vabsq_s64(transmute(a)));
-        let e = i64x2::new(i64::MIN, i64::MAX);
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vbsl_f64() {
         let a = u64x1::new(0x8000000000000000);
@@ -4766,134 +921,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_s16() {
-        let a = i16x4::new(1, 2, 3, -4);
-        let r: i16 = vaddv_s16(transmute(a));
-        let e = 2_i16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_u16() {
-        let a = u16x4::new(1, 2, 3, 4);
-        let r: u16 = vaddv_u16(transmute(a));
-        let e = 10_u16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_s32() {
-        let a = i32x2::new(1, -2);
-        let r: i32 = vaddv_s32(transmute(a));
-        let e = -1_i32;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_u32() {
-        let a = u32x2::new(1, 2);
-        let r: u32 = vaddv_u32(transmute(a));
-        let e = 3_u32;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_s8() {
-        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
-        let r: i8 = vaddv_s8(transmute(a));
-        let e = 20_i8;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddv_u8() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8 = vaddv_u8(transmute(a));
-        let e = 36_u8;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, -8);
-        let r: i16 = vaddvq_s16(transmute(a));
-        let e = 20_i16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16 = vaddvq_u16(transmute(a));
-        let e = 36_u16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_s32() {
-        let a = i32x4::new(1, 2, 3, -4);
-        let r: i32 = vaddvq_s32(transmute(a));
-        let e = 2_i32;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let r: u32 = vaddvq_u32(transmute(a));
-        let e = 10_u32;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
-        let r: i8 = vaddvq_s8(transmute(a));
-        let e = 104_i8;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8 = vaddvq_u8(transmute(a));
-        let e = 136_u8;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_s64() {
-        let a = i64x2::new(1, -2);
-        let r: i64 = vaddvq_s64(transmute(a));
-        let e = -1_i64;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddvq_u64() {
-        let a = u64x2::new(1, 2);
-        let r: u64 = vaddvq_u64(transmute(a));
-        let e = 3_u64;
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_s8() {
-        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
-        let r: i16 = vaddlv_s8(transmute(a));
-        let e = 20_i16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlv_u8() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16 = vaddlv_u8(transmute(a));
-        let e = 36_u16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
-        let r: i16 = vaddlvq_s8(transmute(a));
-        let e = 104_i16;
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddlvq_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u16 = vaddlvq_u8(transmute(a));
-        let e = 136_u16;
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vld1_f64() {
         let a: [f64; 2] = [0., 1.];
@@ -4966,55 +993,6 @@ mod tests {
         assert_eq!(vals[1], 1.);
         assert_eq!(vals[2], 2.);
     }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3tt1aq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(2, 1536, 4, 16395);
-        let r: u32x4 = transmute(vsm3tt1aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3tt1bq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(2, 1536, 4, 16392);
-        let r: u32x4 = transmute(vsm3tt1bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3tt2aq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(2, 1572864, 4, 1447435);
-        let r: u32x4 = transmute(vsm3tt2aq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sm4")]
-    unsafe fn test_vsm3tt2bq_u32() {
-        let a: u32x4 = u32x4::new(1, 2, 3, 4);
-        let b: u32x4 = u32x4::new(1, 2, 3, 4);
-        let c: u32x4 = u32x4::new(1, 2, 3, 4);
-        let e: u32x4 = u32x4::new(2, 1572864, 4, 1052680);
-        let r: u32x4 = transmute(vsm3tt2bq_u32::<0>(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,sha3")]
-    unsafe fn test_vxarq_u64() {
-        let a: u64x2 = u64x2::new(1, 2);
-        let b: u64x2 = u64x2::new(3, 4);
-        let e: u64x2 = u64x2::new(2, 6);
-        let r: u64x2 = transmute(vxarq_u64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
 }
 
 #[cfg(test)]
diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs
index c69661b389..11d6e2df3a 100644
--- a/crates/core_arch/src/arm/mod.rs
+++ b/crates/core_arch/src/arm/mod.rs
@@ -64,12 +64,3 @@ pub use crate::core_arch::arm_shared::*;
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
-
-// NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
-#[cfg(target_endian = "little")]
-#[cfg(any(target_feature = "v7", doc))]
-pub(crate) mod neon;
-#[cfg(target_endian = "little")]
-#[cfg(any(target_feature = "v7", doc))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub use neon::*;
diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs
index ffeb2c6fe1..90c358b5db 100644
--- a/crates/core_arch/src/arm/neon.rs
+++ b/crates/core_arch/src/arm/neon.rs
@@ -1,5 +1,4 @@
 use crate::core_arch::arm_shared::neon::*;
-use crate::mem::{align_of, transmute};
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -10,1101 +9,18 @@ unsafe extern "unadjusted" {
     fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     #[link_name = "llvm.arm.neon.vbsl.v16i8"]
     fn vbslq_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
-    #[link_name = "llvm.arm.neon.vpadals.v4i16.v8i8"]
-    pub(crate) fn vpadal_s8_(a: int16x4_t, b: int8x8_t) -> int16x4_t;
-    #[link_name = "llvm.arm.neon.vpadals.v2i32.v4i16"]
-    pub(crate) fn vpadal_s16_(a: int32x2_t, b: int16x4_t) -> int32x2_t;
-    #[link_name = "llvm.arm.neon.vpadals.v1i64.v2i32"]
-    pub(crate) fn vpadal_s32_(a: int64x1_t, b: int32x2_t) -> int64x1_t;
-    #[link_name = "llvm.arm.neon.vpadals.v8i16.v16i8"]
-    pub(crate) fn vpadalq_s8_(a: int16x8_t, b: int8x16_t) -> int16x8_t;
-    #[link_name = "llvm.arm.neon.vpadals.v4i32.v8i16"]
-    pub(crate) fn vpadalq_s16_(a: int32x4_t, b: int16x8_t) -> int32x4_t;
-    #[link_name = "llvm.arm.neon.vpadals.v2i64.v4i32"]
-    pub(crate) fn vpadalq_s32_(a: int64x2_t, b: int32x4_t) -> int64x2_t;
-
-    #[link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8"]
-    pub(crate) fn vpadal_u8_(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t;
-    #[link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16"]
-    pub(crate) fn vpadal_u16_(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t;
-    #[link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32"]
-    pub(crate) fn vpadal_u32_(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t;
-    #[link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8"]
-    pub(crate) fn vpadalq_u8_(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t;
-    #[link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16"]
-    pub(crate) fn vpadalq_u16_(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t;
-    #[link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32"]
-    pub(crate) fn vpadalq_u32_(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t;
-
-    #[link_name = "llvm.arm.neon.vtbl1"]
-    fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbl2"]
-    fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbl3"]
-    fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbl4"]
-    fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
-
-    #[link_name = "llvm.arm.neon.vtbx1"]
-    fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbx2"]
-    fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbx3"]
-    fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vtbx4"]
-    fn vtbx4(
-        a: int8x8_t,
-        b: int8x8_t,
-        b: int8x8_t,
-        c: int8x8_t,
-        d: int8x8_t,
-        e: int8x8_t,
-    ) -> int8x8_t;
-
-    #[link_name = "llvm.arm.neon.vshiftins.v8i8"]
-    fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, shift: int8x8_t) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v16i8"]
-    fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, shift: int8x16_t) -> int8x16_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v4i16"]
-    fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, shift: int16x4_t) -> int16x4_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v8i16"]
-    fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, shift: int16x8_t) -> int16x8_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v2i32"]
-    fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, shift: int32x2_t) -> int32x2_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v4i32"]
-    fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, shift: int32x4_t) -> int32x4_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v1i64"]
-    fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, shift: int64x1_t) -> int64x1_t;
-    #[link_name = "llvm.arm.neon.vshiftins.v2i64"]
-    fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, shift: int64x2_t) -> int64x2_t;
-
-    #[link_name = "llvm.arm.neon.vld1.v8i8.p0i8"]
-    fn vld1_v8i8(addr: *const i8, align: i32) -> int8x8_t;
-    #[link_name = "llvm.arm.neon.vld1.v16i8.p0i8"]
-    fn vld1q_v16i8(addr: *const i8, align: i32) -> int8x16_t;
-    #[link_name = "llvm.arm.neon.vld1.v4i16.p0i8"]
-    fn vld1_v4i16(addr: *const i8, align: i32) -> int16x4_t;
-    #[link_name = "llvm.arm.neon.vld1.v8i16.p0i8"]
-    fn vld1q_v8i16(addr: *const i8, align: i32) -> int16x8_t;
-    #[link_name = "llvm.arm.neon.vld1.v2i32.p0i8"]
-    fn vld1_v2i32(addr: *const i8, align: i32) -> int32x2_t;
-    #[link_name = "llvm.arm.neon.vld1.v4i32.p0i8"]
-    fn vld1q_v4i32(addr: *const i8, align: i32) -> int32x4_t;
-    #[link_name = "llvm.arm.neon.vld1.v1i64.p0i8"]
-    fn vld1_v1i64(addr: *const i8, align: i32) -> int64x1_t;
-    #[link_name = "llvm.arm.neon.vld1.v2i64.p0i8"]
-    fn vld1q_v2i64(addr: *const i8, align: i32) -> int64x2_t;
-    #[link_name = "llvm.arm.neon.vld1.v2f32.p0i8"]
-    fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t;
-    #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"]
-    fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t;
-
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v8i8"]
-    fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v16i8"]
-    fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v4i16"]
-    fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v8i16"]
-    fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v2i32"]
-    fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v4i32"]
-    fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v1i64"]
-    fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v2i64"]
-    fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v2f32"]
-    fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
-    #[link_name = "llvm.arm.neon.vst1.p0i8.v4f32"]
-    fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
-    vld1_v8i8(ptr as *const i8, align_of::<i8>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
-    vld1q_v16i8(ptr as *const i8, align_of::<i8>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
-    vld1_v4i16(ptr as *const i8, align_of::<i16>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
-    vld1q_v8i16(ptr as *const i8, align_of::<i16>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
-    vld1_v2i32(ptr as *const i8, align_of::<i32>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
-    vld1q_v4i32(ptr as *const i8, align_of::<i32>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
-    vld1_v1i64(ptr as *const i8, align_of::<i64>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
-    vld1q_v2i64(ptr as *const i8, align_of::<i64>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
-    transmute(vld1_v8i8(ptr as *const i8, align_of::<u8>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
-    transmute(vld1q_v16i8(ptr as *const i8, align_of::<u8>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
-    transmute(vld1_v4i16(ptr as *const i8, align_of::<u16>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
-    transmute(vld1q_v8i16(ptr as *const i8, align_of::<u16>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
-    transmute(vld1_v2i32(ptr as *const i8, align_of::<u32>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
-    transmute(vld1q_v4i32(ptr as *const i8, align_of::<u32>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
-    transmute(vld1_v1i64(ptr as *const i8, align_of::<u64>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
-    transmute(vld1q_v2i64(ptr as *const i8, align_of::<u64>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
-    transmute(vld1_v8i8(ptr as *const i8, align_of::<p8>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
-    transmute(vld1q_v16i8(ptr as *const i8, align_of::<p8>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
-    transmute(vld1_v4i16(ptr as *const i8, align_of::<p16>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
-    transmute(vld1q_v8i16(ptr as *const i8, align_of::<p16>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
-    transmute(vld1_v1i64(ptr as *const i8, align_of::<p64>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr("vld1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
-    transmute(vld1q_v2i64(ptr as *const i8, align_of::<p64>() as i32))
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vldr))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
-    vld1_v2f32(ptr as *const i8, align_of::<f32>() as i32)
-}
-
-/// Load multiple single-element structures to one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vld1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
-    vld1q_v4f32(ptr as *const i8, align_of::<f32>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
-    vst1_v8i8(ptr as *const i8, a, align_of::<i8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
-    vst1q_v16i8(ptr as *const i8, a, align_of::<i8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
-    vst1_v4i16(ptr as *const i8, a, align_of::<i16>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
-    vst1q_v8i16(ptr as *const i8, a, align_of::<i16>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
-    vst1_v2i32(ptr as *const i8, a, align_of::<i32>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
-    vst1q_v4i32(ptr as *const i8, a, align_of::<i32>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
-    vst1_v1i64(ptr as *const i8, a, align_of::<i64>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
-    vst1q_v2i64(ptr as *const i8, a, align_of::<i64>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
-    vst1_v8i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
-    vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
-    vst1_v4i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
-    vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
-    vst1_v2i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
-    vst1q_v4i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
-    vst1_v1i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
-    vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
-    vst1_v8i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.8"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
-    vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
-    vst1_v4i16(ptr as *const i8, transmute(a), align_of::<p16>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.16"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
-    vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)
-#[inline]
-#[target_feature(enable = "neon,aes,v8")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
-    vst1_v1i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
-}
-
-/// Store multiple single-element structures from one, two, three, or four registers.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)
-#[inline]
-#[target_feature(enable = "neon,aes,v8")]
-#[cfg_attr(test, assert_instr("vst1.64"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
-    vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<p64>() as i32)
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
-    vst1_v2f32(ptr as *const i8, a, align_of::<f32>() as i32)
-}
-
-// Store multiple single-element structures from one, two, three, or four registers.
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vst1.32"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
-    vst1q_v4f32(ptr as *const i8, a, align_of::<f32>() as i32)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vtbl1(a, b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vtbl1(transmute(a), transmute(b)))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    transmute(vtbl1(transmute(a), transmute(b)))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    vtbl2(a.0, a.1, b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
-    vtbl3(a.0, a.1, a.2, b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vtbl3(
-        transmute(a.0),
-        transmute(a.1),
-        transmute(a.2),
-        transmute(b),
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    transmute(vtbl3(
-        transmute(a.0),
-        transmute(a.1),
-        transmute(a.2),
-        transmute(b),
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
-    vtbl4(a.0, a.1, a.2, a.3, b)
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vtbl4(
-        transmute(a.0),
-        transmute(a.1),
-        transmute(a.2),
-        transmute(a.3),
-        transmute(b),
-    ))
-}
-
-/// Table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbl))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    transmute(vtbl4(
-        transmute(a.0),
-        transmute(a.1),
-        transmute(a.2),
-        transmute(a.3),
-        transmute(b),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    vtbx1(a, b, c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
-    transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    vtbx2(a, b.0, b.1, c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    transmute(vtbx2(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(c),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    transmute(vtbx2(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(c),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
-    vtbx3(a, b.0, b.1, b.2, c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    transmute(vtbx3(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(b.2),
-        transmute(c),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    transmute(vtbx3(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(b.2),
-        transmute(c),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
-    vtbx4(a, b.0, b.1, b.2, b.3, c)
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    transmute(vtbx4(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(b.2),
-        transmute(b.3),
-        transmute(c),
-    ))
-}
-
-/// Extended table look-up
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vtbx))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    transmute(vtbx4(
-        transmute(a),
-        transmute(b.0),
-        transmute(b.1),
-        transmute(b.2),
-        transmute(b.3),
-        transmute(c),
-    ))
-}
-
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    vshiftins_v8i8(a, b, int8x8_t::splat(n))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    vshiftins_v16i8(a, b, int8x16_t::splat(n))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    vshiftins_v4i16(a, b, int16x4_t::splat(n))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    vshiftins_v8i16(a, b, int16x8_t::splat(n))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    vshiftins_v2i32(a, b, int32x2_t::splat(N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    vshiftins_v4i32(a, b, int32x4_t::splat(N))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(0 <= N && N <= 63);
-    vshiftins_v1i64(a, b, int64x1_t::splat(N as i64))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(0 <= N && N <= 63);
-    vshiftins_v2i64(a, b, int64x2_t::splat(N as i64))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    transmute(vshiftins_v8i8(
-        transmute(a),
-        transmute(b),
-        int8x8_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    transmute(vshiftins_v16i8(
-        transmute(a),
-        transmute(b),
-        int8x16_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    transmute(vshiftins_v4i16(
-        transmute(a),
-        transmute(b),
-        int16x4_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    transmute(vshiftins_v8i16(
-        transmute(a),
-        transmute(b),
-        int16x8_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(N >= 0 && N <= 31);
-    transmute(vshiftins_v2i32(
-        transmute(a),
-        transmute(b),
-        int32x2_t::splat(N),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(N >= 0 && N <= 31);
-    transmute(vshiftins_v4i32(
-        transmute(a),
-        transmute(b),
-        int32x4_t::splat(N),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(0 <= N && N <= 63);
-    transmute(vshiftins_v1i64(
-        transmute(a),
-        transmute(b),
-        int64x1_t::splat(N as i64),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert!(0 <= N && N <= 63);
-    transmute(vshiftins_v2i64(
-        transmute(a),
-        transmute(b),
-        int64x2_t::splat(N as i64),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    transmute(vshiftins_v8i8(
-        transmute(a),
-        transmute(b),
-        int8x8_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    let n = N as i8;
-    transmute(vshiftins_v16i8(
-        transmute(a),
-        transmute(b),
-        int8x16_t::splat(n),
-    ))
-}
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    transmute(vshiftins_v4i16(
-        transmute(a),
-        transmute(b),
-        int16x4_t::splat(n),
-    ))
-}
-
-/// Shift Left and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsli.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    let n = N as i16;
-    transmute(vshiftins_v8i16(
-        transmute(a),
-        transmute(b),
-        int16x8_t::splat(n),
-    ))
 }
 
-/// Shift Left and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7,aes")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
 pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
     static_assert!(0 <= N && N <= 63);
     transmute(vshiftins_v1i64(
@@ -1114,14 +30,17 @@ pub unsafe fn vsli_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1
     ))
 }
 
-/// Shift Left and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7,aes")]
-#[cfg_attr(test, assert_instr("vsli.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
 pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert!(0 <= N && N <= 63);
     transmute(vshiftins_v2i64(
@@ -1130,185 +49,41 @@ pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x
         int64x2_t::splat(N as i64),
     ))
 }
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    vshiftins_v8i8(a, b, int8x8_t::splat(n))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    vshiftins_v16i8(a, b, int8x16_t::splat(n))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    vshiftins_v4i16(a, b, int16x4_t::splat(n))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    vshiftins_v8i16(a, b, int16x8_t::splat(n))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert!(1 <= N && N <= 32);
-    vshiftins_v2i32(a, b, int32x2_t::splat(-N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert!(1 <= N && N <= 32);
-    vshiftins_v4i32(a, b, int32x4_t::splat(-N))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(1 <= N && N <= 64);
-    vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert!(1 <= N && N <= 64);
-    vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    transmute(vshiftins_v8i8(
-        transmute(a),
-        transmute(b),
-        int8x8_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    transmute(vshiftins_v16i8(
-        transmute(a),
-        transmute(b),
-        int8x16_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    transmute(vshiftins_v4i16(
-        transmute(a),
-        transmute(b),
-        int16x4_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
+
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    transmute(vshiftins_v8i16(
-        transmute(a),
-        transmute(b),
-        int16x8_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert!(1 <= N && N <= 32);
-    transmute(vshiftins_v2i32(
+pub unsafe fn vsliq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
+    static_assert!(0 <= N && N <= 63);
+    let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
         transmute(a),
         transmute(b),
-        int32x2_t::splat(-N),
-    ))
+        int64x2_t::splat(N as i64),
+    ));
+    simd_shuffle!(ret_val, ret_val, [0, 1])
 }
-/// Shift Right and Insert (immediate)
+
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.32", N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert!(1 <= N && N <= 32);
-    transmute(vshiftins_v4i32(
-        transmute(a),
-        transmute(b),
-        int32x4_t::splat(-N),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
     static_assert!(1 <= N && N <= 64);
     transmute(vshiftins_v1i64(
         transmute(a),
@@ -1316,13 +91,19 @@ pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1
         int64x1_t::splat(-N as i64),
     ))
 }
-/// Shift Right and Insert (immediate)
+
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7,aes")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert!(1 <= N && N <= 64);
     transmute(vshiftins_v2i64(
         transmute(a),
@@ -1330,121 +111,26 @@ pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
         int64x2_t::splat(-N as i64),
     ))
 }
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    transmute(vshiftins_v8i8(
-        transmute(a),
-        transmute(b),
-        int8x8_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.8", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert!(1 <= N && N <= 8);
-    let n = -N as i8;
-    transmute(vshiftins_v16i8(
-        transmute(a),
-        transmute(b),
-        int8x16_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    transmute(vshiftins_v4i16(
-        transmute(a),
-        transmute(b),
-        int16x4_t::splat(n),
-    ))
-}
-/// Shift Right and Insert (immediate)
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr("vsri.16", N = 1))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert!(1 <= N && N <= 16);
-    let n = -N as i16;
-    transmute(vshiftins_v8i16(
-        transmute(a),
-        transmute(b),
-        int16x8_t::splat(n),
-    ))
-}
 
-/// Shift Right and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p64)
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7,aes")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vsri_n_p64<const N: i32>(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(1 <= N && N <= 64);
-    transmute(vshiftins_v1i64(
-        transmute(a),
-        transmute(b),
-        int64x1_t::splat(-N as i64),
-    ))
-}
-
-/// Shift Right and Insert (immediate)
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p64)
-#[inline]
-#[target_feature(enable = "neon,v7,aes")]
-#[cfg_attr(test, assert_instr("vsri.64", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert!(1 <= N && N <= 64);
-    transmute(vshiftins_v2i64(
+    let a: poly64x2_t = simd_shuffle!(a, a, [0, 1]);
+    let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);
+    let ret_val: poly64x2_t = transmute(vshiftins_v2i64(
         transmute(a),
         transmute(b),
         int64x2_t::splat(-N as i64),
-    ))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::core_arch::{arm::*, simd::*};
-    use crate::mem::transmute;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s32_f32() {
-        let f = f32x4::new(-1., 2., 3., 4.);
-        let e = i32x4::new(-1, 2, 3, 4);
-        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u32_f32() {
-        let f = f32x4::new(1., 2., 3., 4.);
-        let e = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
-        assert_eq!(r, e);
-    }
+    ));
+    simd_shuffle!(ret_val, ret_val, [0, 1])
 }
diff --git a/crates/core_arch/src/arm_shared/crc.rs b/crates/core_arch/src/arm_shared/crc.rs
deleted file mode 100644
index 1c10af05f6..0000000000
--- a/crates/core_arch/src/arm_shared/crc.rs
+++ /dev/null
@@ -1,279 +0,0 @@
-unsafe extern "unadjusted" {
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32b"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
-    fn crc32b_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32h"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")]
-    fn crc32h_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32w"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
-    fn crc32w_(crc: u32, data: u32) -> u32;
-
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32cb"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")]
-    fn crc32cb_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32ch"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")]
-    fn crc32ch_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32cw"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
-    fn crc32cw_(crc: u32, data: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32x"
-    )]
-    fn crc32x_(crc: u32, data: u64) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crc32cx"
-    )]
-    fn crc32cx_(crc: u32, data: u64) -> u32;
-}
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-/// CRC32 single round checksum for bytes (8 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32b))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32b(crc: u32, data: u8) -> u32 {
-    crc32b_(crc, data as u32)
-}
-
-/// CRC32 single round checksum for half words (16 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32h)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32h))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32h(crc: u32, data: u16) -> u32 {
-    crc32h_(crc, data as u32)
-}
-
-/// CRC32 single round checksum for words (32 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32w)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32w))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32w(crc: u32, data: u32) -> u32 {
-    crc32w_(crc, data)
-}
-
-/// CRC32-C single round checksum for bytes (8 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cb)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32cb))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32cb(crc: u32, data: u8) -> u32 {
-    crc32cb_(crc, data as u32)
-}
-
-/// CRC32-C single round checksum for half words (16 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32ch)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32ch))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32ch(crc: u32, data: u16) -> u32 {
-    crc32ch_(crc, data as u32)
-}
-
-/// CRC32-C single round checksum for words (32 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(crc32cw))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 {
-    crc32cw_(crc, data)
-}
-
-/// CRC32 single round checksum for quad words (64 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(crc32x))]
-#[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")]
-pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
-    crc32x_(crc, data)
-}
-
-/// CRC32 single round checksum for quad words (64 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(crc32w))]
-#[unstable(feature = "stdarch_aarch32_crc32", issue = "125085")]
-pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
-    // On 32-bit ARM this intrinsic emits a chain of two `crc32_w` instructions
-    // and truncates the data to 32 bits in both clang and gcc
-    crc32w_(
-        crc32w_(crc, (data & 0xffffffff) as u32),
-        (data >> 32) as u32,
-    )
-}
-
-/// CRC32 single round checksum for quad words (64 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(crc32cx))]
-#[stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")]
-pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
-    crc32cx_(crc, data)
-}
-
-/// CRC32 single round checksum for quad words (64 bits).
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)
-#[inline]
-#[target_feature(enable = "crc")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(crc32cw))]
-#[unstable(feature = "stdarch_aarch32_crc32", issue = "125085")]
-pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
-    // On 32-bit ARM this intrinsic emits a chain of two `crc32_cw` instructions
-    // and truncates the data to 32 bits in both clang and gcc
-    crc32cw_(
-        crc32cw_(crc, (data & 0xffffffff) as u32),
-        (data >> 32) as u32,
-    )
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::core_arch::{arm_shared::*, simd::*};
-    use std::mem;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32d() {
-        assert_eq!(__crc32d(0, 0), 0);
-        assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32cd() {
-        assert_eq!(__crc32cd(0, 0), 0);
-        assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32b() {
-        assert_eq!(__crc32b(0, 0), 0);
-        assert_eq!(__crc32b(0, 255), 755167117);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32h() {
-        assert_eq!(__crc32h(0, 0), 0);
-        assert_eq!(__crc32h(0, 16384), 1994146192);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32w() {
-        assert_eq!(__crc32w(0, 0), 0);
-        assert_eq!(__crc32w(0, 4294967295), 3736805603);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32cb() {
-        assert_eq!(__crc32cb(0, 0), 0);
-        assert_eq!(__crc32cb(0, 255), 2910671697);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32ch() {
-        assert_eq!(__crc32ch(0, 0), 0);
-        assert_eq!(__crc32ch(0, 16384), 1098587580);
-    }
-
-    #[simd_test(enable = "crc")]
-    unsafe fn test_crc32cw() {
-        assert_eq!(__crc32cw(0, 0), 0);
-        assert_eq!(__crc32cw(0, 4294967295), 3080238136);
-    }
-}
diff --git a/crates/core_arch/src/arm_shared/crypto.rs b/crates/core_arch/src/arm_shared/crypto.rs
deleted file mode 100644
index 07c96008d9..0000000000
--- a/crates/core_arch/src/arm_shared/crypto.rs
+++ /dev/null
@@ -1,544 +0,0 @@
-use crate::core_arch::arm_shared::{uint8x16_t, uint32x4_t};
-
-#[allow(improper_ctypes)]
-unsafe extern "unadjusted" {
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.aese"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
-    fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.aesd"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")]
-    fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.aesmc"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")]
-    fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.aesimc"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")]
-    fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t;
-
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1h"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")]
-    fn vsha1h_u32_(hash_e: u32) -> u32;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1su0"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
-    fn vsha1su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1su1"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
-    fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1c"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
-    fn vsha1cq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1p"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
-    fn vsha1pq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha1m"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
-    fn vsha1mq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t;
-
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha256h"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
-    fn vsha256hq_u32_(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha256h2"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
-    fn vsha256h2q_u32_(hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha256su0"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
-    fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.crypto.sha256su1"
-    )]
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
-    fn vsha256su1q_u32_(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t;
-}
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-/// AES single round encryption.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)
-#[inline]
-#[target_feature(enable = "aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(aese))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
-    vaeseq_u8_(data, key)
-}
-
-/// AES single round decryption.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)
-#[inline]
-#[target_feature(enable = "aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(aesd))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
-    vaesdq_u8_(data, key)
-}
-
-/// AES mix columns.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)
-#[inline]
-#[target_feature(enable = "aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(aesmc))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
-    vaesmcq_u8_(data)
-}
-
-/// AES inverse mix columns.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)
-#[inline]
-#[target_feature(enable = "aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(aesimc))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
-    vaesimcq_u8_(data)
-}
-
-/// SHA1 fixed rotate.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1h))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
-    vsha1h_u32_(hash_e)
-}
-
-/// SHA1 hash update accelerator, choose.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1c))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    vsha1cq_u32_(hash_abcd, hash_e, wk)
-}
-
-/// SHA1 hash update accelerator, majority.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1m))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    vsha1mq_u32_(hash_abcd, hash_e, wk)
-}
-
-/// SHA1 hash update accelerator, parity.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1p))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
-    vsha1pq_u32_(hash_abcd, hash_e, wk)
-}
-
-/// SHA1 schedule update accelerator, first part.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1su0))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
-    vsha1su0q_u32_(w0_3, w4_7, w8_11)
-}
-
-/// SHA1 schedule update accelerator, second part.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha1su1))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
-    vsha1su1q_u32_(tw0_3, w12_15)
-}
-
-/// SHA256 hash update accelerator.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256h))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha256hq_u32(
-    hash_abcd: uint32x4_t,
-    hash_efgh: uint32x4_t,
-    wk: uint32x4_t,
-) -> uint32x4_t {
-    vsha256hq_u32_(hash_abcd, hash_efgh, wk)
-}
-
-/// SHA256 hash update accelerator, upper part.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256h2))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha256h2q_u32(
-    hash_efgh: uint32x4_t,
-    hash_abcd: uint32x4_t,
-    wk: uint32x4_t,
-) -> uint32x4_t {
-    vsha256h2q_u32_(hash_efgh, hash_abcd, wk)
-}
-
-/// SHA256 schedule update accelerator, first part.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256su0))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
-    vsha256su0q_u32_(w0_3, w4_7)
-}
-
-/// SHA256 schedule update accelerator, second part.
-///
-/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)
-#[inline]
-#[target_feature(enable = "sha2")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(test, assert_instr(sha256su1))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub unsafe fn vsha256su1q_u32(
-    tw0_3: uint32x4_t,
-    w8_11: uint32x4_t,
-    w12_15: uint32x4_t,
-) -> uint32x4_t {
-    vsha256su1q_u32_(tw0_3, w8_11, w12_15)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::core_arch::{arm_shared::*, simd::*};
-    use std::mem;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "aes")]
-    unsafe fn test_vaeseq_u8() {
-        let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
-        let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
-        let r: u8x16 = mem::transmute(vaeseq_u8(data, key));
-        assert_eq!(
-            r,
-            u8x16::new(
-                124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118, 124, 123, 124, 197
-            )
-        );
-    }
-
-    #[simd_test(enable = "aes")]
-    unsafe fn test_vaesdq_u8() {
-        let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
-        let key = mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
-        let r: u8x16 = mem::transmute(vaesdq_u8(data, key));
-        assert_eq!(
-            r,
-            u8x16::new(9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56)
-        );
-    }
-
-    #[simd_test(enable = "aes")]
-    unsafe fn test_vaesmcq_u8() {
-        let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
-        let r: u8x16 = mem::transmute(vaesmcq_u8(data));
-        assert_eq!(
-            r,
-            u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30)
-        );
-    }
-
-    #[simd_test(enable = "aes")]
-    unsafe fn test_vaesimcq_u8() {
-        let data = mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8));
-        let r: u8x16 = mem::transmute(vaesimcq_u8(data));
-        assert_eq!(
-            r,
-            u8x16::new(
-                43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70
-            )
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1h_u32() {
-        assert_eq!(vsha1h_u32(0x1234), 0x048d);
-        assert_eq!(vsha1h_u32(0x5678), 0x159e);
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1su0q_u32() {
-        let r: u32x4 = mem::transmute(vsha1su0q_u32(
-            mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
-            mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
-            mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)),
-        ));
-        assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678));
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1su1q_u32() {
-        let r: u32x4 = mem::transmute(vsha1su1q_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1cq_u32() {
-        let r: u32x4 = mem::transmute(vsha1cq_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            0x1234,
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1pq_u32() {
-        let r: u32x4 = mem::transmute(vsha1pq_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            0x1234,
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha1mq_u32() {
-        let r: u32x4 = mem::transmute(vsha1mq_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            0x1234,
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha256hq_u32() {
-        let r: u32x4 = mem::transmute(vsha256hq_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha256h2q_u32() {
-        let r: u32x4 = mem::transmute(vsha256h2q_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha256su0q_u32() {
-        let r: u32x4 = mem::transmute(vsha256su0q_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152)
-        );
-    }
-
-    #[simd_test(enable = "sha2")]
-    unsafe fn test_vsha256su1q_u32() {
-        let r: u32x4 = mem::transmute(vsha256su1q_u32(
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-            mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)),
-        ));
-        assert_eq!(
-            r,
-            u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f)
-        );
-    }
-}
diff --git a/crates/core_arch/src/arm_shared/mod.rs b/crates/core_arch/src/arm_shared/mod.rs
index 9dee6aed3b..dcfa500085 100644
--- a/crates/core_arch/src/arm_shared/mod.rs
+++ b/crates/core_arch/src/arm_shared/mod.rs
@@ -60,46 +60,6 @@ mod hints;
 #[unstable(feature = "stdarch_arm_hints", issue = "117218")]
 pub use self::hints::*;
 
-mod crc;
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
-)]
-pub use crc::*;
-
-// NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
-#[cfg(target_endian = "little")]
-#[cfg(any(
-    target_arch = "aarch64",
-    target_arch = "arm64ec",
-    target_feature = "v7",
-    doc
-))]
-mod crypto;
-// NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
-#[cfg(target_endian = "little")]
-#[cfg(any(
-    target_arch = "aarch64",
-    target_arch = "arm64ec",
-    target_feature = "v7",
-    doc
-))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
-)]
-pub use self::crypto::*;
-
-// NEON intrinsics are currently broken on big-endian, so don't expose them. (#1484)
-#[cfg(target_endian = "little")]
 #[cfg(any(
     target_arch = "aarch64",
     target_arch = "arm64ec",
@@ -107,7 +67,7 @@ pub use self::crypto::*;
     doc
 ))]
 pub(crate) mod neon;
-#[cfg(target_endian = "little")]
+
 #[cfg(any(
     target_arch = "aarch64",
     target_arch = "arm64ec",
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index 48c162de4b..868cb1937b 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -12,6 +12,440 @@ use stdarch_test::assert_instr;
 
 use super::*;
 
+#[doc = "CRC32 single round checksum for bytes (8 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32b))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32b(crc: u32, data: u8) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32b"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
+        fn ___crc32b(crc: i32, data: i32) -> i32;
+    }
+    ___crc32b(crc.as_signed(), data.as_signed() as i32).as_unsigned()
+}
+#[doc = "CRC32-C single round checksum for bytes (8 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cb)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32cb))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32cb(crc: u32, data: u8) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32cb"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cb")]
+        fn ___crc32cb(crc: i32, data: i32) -> i32;
+    }
+    ___crc32cb(crc.as_signed(), data.as_signed() as i32).as_unsigned()
+}
+#[doc = "CRC32-C single round checksum for quad words (64 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cd)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(crc32cw))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 {
+    let a: i32 = crc as i32;
+    let b: i32 = (data & 0xFFFFFFFF).as_signed() as i32;
+    let c: i32 = (data >> 32).as_signed() as i32;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
+        fn ___crc32cw(crc: i32, data: i32) -> i32;
+    }
+    ___crc32cw(___crc32cw(a, b), c).as_unsigned() as u32
+}
+#[doc = "CRC32-C single round checksum for bytes (16 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32ch)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32ch))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32ch(crc: u32, data: u16) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32ch"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32ch")]
+        fn ___crc32ch(crc: i32, data: i32) -> i32;
+    }
+    ___crc32ch(crc.as_signed(), data.as_signed() as i32).as_unsigned()
+}
+#[doc = "CRC32-C single round checksum for bytes (32 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32cw))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32cw"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")]
+        fn ___crc32cw(crc: i32, data: i32) -> i32;
+    }
+    ___crc32cw(crc.as_signed(), data.as_signed()).as_unsigned()
+}
+#[doc = "CRC32 single round checksum for quad words (64 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(crc32w))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 {
+    let a: i32 = crc as i32;
+    let b: i32 = (data & 0xFFFFFFFF).as_signed() as i32;
+    let c: i32 = (data >> 32).as_signed() as i32;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
+        fn ___crc32w(crc: i32, data: i32) -> i32;
+    }
+    ___crc32w(___crc32w(a, b), c).as_unsigned()
+}
+#[doc = "CRC32 single round checksum for bytes (16 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32h)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32h))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32h(crc: u32, data: u16) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32h"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32h")]
+        fn ___crc32h(crc: i32, data: i32) -> i32;
+    }
+    ___crc32h(crc.as_signed(), data.as_signed() as i32).as_unsigned()
+}
+#[doc = "CRC32 single round checksum for bytes (32 bits)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32w)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(crc32w))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_aarch32_crc32", issue = "125085")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_aarch64_crc32", since = "1.80.0")
+)]
+pub unsafe fn __crc32w(crc: u32, data: u32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crc32w"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")]
+        fn ___crc32w(crc: i32, data: i32) -> i32;
+    }
+    ___crc32w(crc.as_signed(), data.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v4i16.v8i8")]
+        fn _priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t;
+    }
+    _priv_vpadal_s8(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v8i16.v16i8")]
+        fn _priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t;
+    }
+    _priv_vpadalq_s8(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v2i32.v4i16")]
+        fn _priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t;
+    }
+    _priv_vpadal_s16(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v4i32.v8i16")]
+        fn _priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t;
+    }
+    _priv_vpadalq_s16(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v1i64.v2i32")]
+        fn _priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t;
+    }
+    _priv_vpadal_s32(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadals.v2i64.v4i32")]
+        fn _priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t;
+    }
+    _priv_vpadalq_s32(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v4i16.v8i8")]
+        fn _priv_vpadal_u8(a: int16x4_t, b: int8x8_t) -> int16x4_t;
+    }
+    _priv_vpadal_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v8i16.v16i8")]
+        fn _priv_vpadalq_u8(a: int16x8_t, b: int8x16_t) -> int16x8_t;
+    }
+    _priv_vpadalq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v2i32.v4i16")]
+        fn _priv_vpadal_u16(a: int32x2_t, b: int16x4_t) -> int32x2_t;
+    }
+    _priv_vpadal_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v4i32.v8i16")]
+        fn _priv_vpadalq_u16(a: int32x4_t, b: int16x8_t) -> int32x4_t;
+    }
+    _priv_vpadalq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v1i64.v2i32")]
+        fn _priv_vpadal_u32(a: int64x1_t, b: int32x2_t) -> int64x1_t;
+    }
+    _priv_vpadal_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+unsafe fn priv_vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadalu.v2i64.v4i32")]
+        fn _priv_vpadalq_u32(a: int64x2_t, b: int32x4_t) -> int64x2_t;
+    }
+    _priv_vpadalq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
 #[doc = "Signed Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_s8)"]
 #[doc = "## Safety"]
@@ -780,6 +1214,192 @@ pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
 pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
     simd_fabs(a)
 }
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabs_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
+        fn _vabs_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vabs_s8(a)
+}
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabsq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")]
+        fn _vabsq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vabsq_s8(a)
+}
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabs_s16(a: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")]
+        fn _vabs_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vabs_s16(a)
+}
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")]
+        fn _vabsq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vabsq_s16(a)
+}
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabs_s32(a: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")]
+        fn _vabs_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vabs_s32(a)
+}
+#[doc = "Absolute value (wrapping)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabsq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(abs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.abs.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
+        fn _vabsq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vabsq_s32(a)
+}
 #[doc = "Bitwise exclusive OR"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_p8)"]
 #[doc = "## Safety"]
@@ -941,6 +1561,114 @@ pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 {
     a ^ b
 }
+#[doc = "AES single round encryption."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesdq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesd))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesd"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesd")]
+        fn _vaesdq_u8(data: int8x16_t, key: int8x16_t) -> int8x16_t;
+    }
+    _vaesdq_u8(data.as_signed(), key.as_signed()).as_unsigned()
+}
+#[doc = "AES single round encryption."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaeseq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aese))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aese"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
+        fn _vaeseq_u8(data: int8x16_t, key: int8x16_t) -> int8x16_t;
+    }
+    _vaeseq_u8(data.as_signed(), key.as_signed()).as_unsigned()
+}
+#[doc = "AES inverse mix columns."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesimcq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesimc))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesimc"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesimc")]
+        fn _vaesimcq_u8(data: int8x16_t) -> int8x16_t;
+    }
+    _vaesimcq_u8(data.as_signed()).as_unsigned()
+}
+#[doc = "AES mix columns."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaesmcq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(aesmc))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.aesmc"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aesmc")]
+        fn _vaesmcq_u8(data: int8x16_t) -> int8x16_t;
+    }
+    _vaesmcq_u8(data.as_signed()).as_unsigned()
+}
 #[doc = "Vector bitwise and"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vand_s8)"]
 #[doc = "## Safety"]
@@ -3506,6 +4234,68 @@ pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     simd_lt(a, b)
 }
 #[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v8i8"
+        )]
+        fn _vclz_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vclz_s8(a)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(clz)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v16i8"
+        )]
+        fn _vclzq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vclzq_s8(a)
+}
+#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
@@ -3526,7 +4316,15 @@ pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
-    vclz_s16_(a)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v4i16"
+        )]
+        fn _vclz_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vclz_s16(a)
 }
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s16)"]
@@ -3549,7 +4347,15 @@ pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
-    vclzq_s16_(a)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v8i16"
+        )]
+        fn _vclzq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vclzq_s16(a)
 }
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s32)"]
@@ -3572,7 +4378,15 @@ pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
-    vclz_s32_(a)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v2i32"
+        )]
+        fn _vclz_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vclz_s32(a)
 }
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s32)"]
@@ -3595,16 +4409,25 @@ pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
-    vclzq_s32_(a)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctlz.v4i32"
+        )]
+        fn _vclzq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vclzq_s32(a)
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(clz)
@@ -3617,17 +4440,18 @@ pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
-    vclz_s8_(a)
+pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
+    transmute(vclz_s16(transmute(a)))
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(clz)
@@ -3640,14 +4464,17 @@ pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
-    vclzq_s8_(a)
+pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -3663,14 +4490,15 @@ pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
-    transmute(vclz_s16_(transmute(a)))
+pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
+    transmute(vclzq_s16(transmute(a)))
 }
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -3687,13 +4515,16 @@ pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
-    transmute(vclzq_s16_(transmute(a)))
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -3710,13 +4541,14 @@ pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
-    transmute(vclz_s32_(transmute(a)))
+    transmute(vclz_s32(transmute(a)))
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -3732,17 +4564,20 @@ pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
-    transmute(vclzq_s32_(transmute(a)))
+pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(clz)
@@ -3755,17 +4590,18 @@ pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
-    transmute(vclz_s8_(transmute(a)))
+pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
+    transmute(vclzq_s32(transmute(a)))
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(clz)
@@ -3778,20 +4614,23 @@ pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
-    transmute(vclzq_s8_(transmute(a)))
+pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(clz)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3801,20 +4640,21 @@ pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vclz_s8(transmute(a)))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(clz)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3824,20 +4664,23 @@ pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(clz)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3847,20 +4690,21 @@ pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vclzq_s8(transmute(a)))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(clz)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3870,20 +4714,26 @@ pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
-    transmute(a)
+pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3893,20 +4743,28 @@ pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vcnt_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctpop.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")]
+        fn _vcnt_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vcnt_s8(a)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3916,20 +4774,29 @@ pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vcntq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.ctpop.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")]
+        fn _vcntq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vcntq_s8(a)
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3939,20 +4806,21 @@ pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
+    transmute(vcnt_s8(transmute(a)))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3962,20 +4830,23 @@ pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -3985,20 +4856,21 @@ pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
+    transmute(vcntq_s8(transmute(a)))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4008,20 +4880,27 @@ pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4031,20 +4910,21 @@ pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
-    transmute(a)
+pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
+    transmute(vcnt_s8(transmute(a)))
 }
-#[doc = "Insert vector element from another vector element"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4054,20 +4934,23 @@ pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(scvtf)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4077,20 +4960,21 @@ pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
-    simd_cast(a)
+pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
+    transmute(vcntq_s8(transmute(a)))
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_s32)"]
+#[doc = "Population count per byte."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(scvtf)
+    assert_instr(cnt)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4100,21 +4984,23 @@ pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
-    simd_cast(a)
+pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a)));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ucvtf)
-)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4123,21 +5009,17 @@ pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
-    simd_cast(a)
+pub unsafe fn vcombine_f32(a: float32x2_t, b: float32x2_t) -> float32x4_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ucvtf)
-)]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4146,356 +5028,235 @@ pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    simd_cast(a)
+pub unsafe fn vcombine_s8(a: int8x8_t, b: int8x8_t) -> int8x16_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32"
-        )]
-        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
-    }
-    _vcvt_n_f32_s32(a, N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_s16(a: int16x4_t, b: int16x4_t) -> int16x8_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32"
-        )]
-        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
-    }
-    _vcvtq_n_f32_s32(a, N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_s32(a: int32x2_t, b: int32x2_t) -> int32x4_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32"
-        )]
-        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
-    }
-    _vcvt_n_f32_s32(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_s64(a: int64x1_t, b: int64x1_t) -> int64x2_t {
+    simd_shuffle!(a, b, [0, 1])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(scvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32"
-        )]
-        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
-    }
-    _vcvtq_n_f32_s32(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32"
-        )]
-        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
-    }
-    _vcvt_n_f32_u32(a.as_signed(), N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x8_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32"
-        )]
-        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
-    }
-    _vcvtq_n_f32_u32(a.as_signed(), N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x4_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32"
-        )]
-        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
-    }
-    _vcvt_n_f32_u32(a.as_signed(), N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x2_t {
+    simd_shuffle!(a, b, [0, 1])
 }
-#[doc = "Fixed-point convert to floating-point"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32"
-        )]
-        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
-    }
-    _vcvtq_n_f32_u32(a.as_signed(), N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x16_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32"
-        )]
-        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
-    }
-    _vcvt_n_s32_f32(a, N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x8_t {
+    simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
+#[doc = "Vector combine"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32"
-        )]
-        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
-    }
-    _vcvtq_n_s32_f32(a, N)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t {
+    simd_shuffle!(a, b, [0, 1])
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32"
-        )]
-        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
-    }
-    _vcvt_n_s32_f32(a, N)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
+    transmute(a)
 }
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32"
-        )]
-        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
-    }
-    _vcvtq_n_s32_f32(a, N)
-}
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32"
-        )]
-        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
-    }
-    _vcvt_n_u32_f32(a, N).as_unsigned()
-}
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vcvt, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            target_arch = "arm",
-            link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32"
-        )]
-        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
-    }
-    _vcvtq_n_u32_f32(a, N).as_unsigned()
-}
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32"
-        )]
-        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
-    }
-    _vcvt_n_u32_f32(a, N).as_unsigned()
-}
-#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32"
-        )]
-        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
-    }
-    _vcvtq_n_u32_f32(a, N).as_unsigned()
-}
-#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fcvtzs)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4505,28 +5266,22 @@ pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptosi.sat.v2i32.v2f32"
-        )]
-        fn _vcvt_s32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvt_s32_f32(a)
+pub unsafe fn vcreate_f32(a: u64) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s32_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fcvtzs)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4536,28 +5291,21 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptosi.sat.v4i32.v4f32"
-        )]
-        fn _vcvtq_s32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtq_s32_f32(a)
+pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
+    transmute(a)
 }
-#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fcvtzu)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4567,28 +5315,22 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptoui.sat.v2i32.v2f32"
-        )]
-        fn _vcvt_u32_f32(a: float32x2_t) -> int32x2_t;
-    }
-    _vcvt_u32_f32(a).as_unsigned()
+pub unsafe fn vcreate_s8(a: u64) -> int8x8_t {
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u32_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fcvtzu)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4598,274 +5340,217 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.fptoui.sat.v4i32.v4f32"
-        )]
-        fn _vcvtq_u32_f32(a: float32x4_t) -> int32x4_t;
-    }
-    _vcvtq_u32_f32(a).as_unsigned()
+pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
+    transmute(a)
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_s32(a, b, transmute(c))
+pub unsafe fn vcreate_s16(a: u64) -> int16x4_t {
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot, LANE = 0)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdotq_lane_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int8x16_t,
-    c: int8x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: int32x2_t = transmute(c);
-    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_s32(a, b, transmute(c))
+pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
+    transmute(a)
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdot_lane_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint8x8_t,
-    c: uint8x8_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-    vdot_u32(a, b, transmute(c))
+pub unsafe fn vcreate_s32(a: u64) -> int32x2_t {
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot, LANE = 0)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdotq_lane_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint8x16_t,
-    c: uint8x8_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let c: uint32x2_t = transmute(c);
-    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vdotq_u32(a, b, transmute(c))
+pub unsafe fn vcreate_s64(a: u64) -> int64x1_t {
+    transmute(a)
 }
-#[doc = "Dot product arithmetic (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8"
-        )]
-        fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
-    }
-    _vdot_s32(a, b, c)
+pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
+    transmute(a)
 }
-#[doc = "Dot product arithmetic (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sdot)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8"
-        )]
-        fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    _vdotq_s32(a, b, c)
+pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Dot product arithmetic (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.udot.v2i32.v8i8"
-        )]
-        fn _vdot_u32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
-    }
-    _vdot_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
+    transmute(a)
 }
-#[doc = "Dot product arithmetic (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(udot)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.udot.v4i32.v16i8"
-        )]
-        fn _vdotq_u32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    _vdotq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4874,23 +5559,22 @@ pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
+    transmute(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4899,23 +5583,22 @@ pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4924,23 +5607,22 @@ pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t {
+    transmute(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4949,23 +5631,22 @@ pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
+    transmute(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4974,23 +5655,23 @@ pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -4999,23 +5680,22 @@ pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
+    transmute(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5024,23 +5704,22 @@ pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5049,23 +5728,21 @@ pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t {
+    transmute(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(scvtf)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5074,23 +5751,21 @@ pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
+pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t {
+    simd_cast(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(scvtf)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5099,27 +5774,21 @@ pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
-}
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
+pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t {
+    simd_cast(a)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(ucvtf)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5128,27 +5797,21 @@ pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
+pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t {
+    simd_cast(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(ucvtf)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5157,27 +5820,357 @@ pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 2);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
+pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
+    simd_cast(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
+    }
+    _vcvt_n_f32_s32(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
+    }
+    _vcvtq_n_f32_s32(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f32_s32<const N: i32>(a: int32x2_t) -> float32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_s32(a: int32x2_t, n: i32) -> float32x2_t;
+    }
+    _vcvt_n_f32_s32(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(scvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f32_s32<const N: i32>(a: int32x4_t) -> float32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_s32(a: int32x4_t, n: i32) -> float32x4_t;
+    }
+    _vcvtq_n_f32_s32(a, N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
+    }
+    _vcvt_n_f32_u32(a.as_signed(), N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
+    }
+    _vcvtq_n_f32_u32(a.as_signed(), N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_f32_u32<const N: i32>(a: uint32x2_t) -> float32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32"
+        )]
+        fn _vcvt_n_f32_u32(a: int32x2_t, n: i32) -> float32x2_t;
+    }
+    _vcvt_n_f32_u32(a.as_signed(), N)
+}
+#[doc = "Fixed-point convert to floating-point"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_f32_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ucvtf, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32"
+        )]
+        fn _vcvtq_n_f32_u32(a: int32x4_t, n: i32) -> float32x4_t;
+    }
+    _vcvtq_n_f32_u32(a.as_signed(), N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32"
+        )]
+        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
+    }
+    _vcvt_n_s32_f32(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
+    }
+    _vcvtq_n_s32_f32(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_s32_f32<const N: i32>(a: float32x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32"
+        )]
+        fn _vcvt_n_s32_f32(a: float32x2_t, n: i32) -> int32x2_t;
+    }
+    _vcvt_n_s32_f32(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzs, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_s32_f32(a: float32x4_t, n: i32) -> int32x4_t;
+    }
+    _vcvtq_n_s32_f32(a, N)
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32"
+        )]
+        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
+    }
+    _vcvt_n_u32_f32(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vcvt, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "arm",
+            link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
+    }
+    _vcvtq_n_u32_f32(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvt_n_u32_f32<const N: i32>(a: float32x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32"
+        )]
+        fn _vcvt_n_u32_f32(a: float32x2_t, n: i32) -> int32x2_t;
+    }
+    _vcvt_n_u32_f32(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_n_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(fcvtzu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32"
+        )]
+        fn _vcvtq_n_u32_f32(a: float32x4_t, n: i32) -> int32x4_t;
+    }
+    _vcvtq_n_u32_f32(a, N).as_unsigned()
+}
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_s32_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(fcvtzs)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5186,27 +6179,29 @@ pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
+pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v2i32.v2f32"
+        )]
+        fn _vcvt_s32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvt_s32_f32(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
+#[doc = "Floating-point convert to signed fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_s32_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(fcvtzs)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5215,27 +6210,29 @@ pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
+pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptosi.sat.v4i32.v4f32"
+        )]
+        fn _vcvtq_s32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtq_s32_f32(a)
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_u32_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(fcvtzu)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5244,27 +6241,29 @@ pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-    )
+pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v2i32.v2f32"
+        )]
+        fn _vcvt_u32_f32(a: float32x2_t) -> int32x2_t;
+    }
+    _vcvt_u32_f32(a).as_unsigned()
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
+#[doc = "Floating-point convert to unsigned fixed-point, rounding toward zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtq_u32_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(fcvtzu)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5273,142 +6272,272 @@ pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-        ]
-    )
+pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.fptoui.sat.v4i32.v4f32"
+        )]
+        fn _vcvtq_u32_f32(a: float32x4_t) -> int32x4_t;
+    }
+    _vcvtq_u32_f32(a).as_unsigned()
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(sdot, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-        ]
-    )
-}
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
-#[doc = "## Safety"]
+pub unsafe fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_s32(a, b, transmute(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
+#[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(sdot, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    simd_shuffle!(
-        a,
-        a,
-        [
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-        ]
-    )
+pub unsafe fn vdotq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int8x16_t,
+    c: int8x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: int32x2_t = transmute(c);
+    let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_s32(a, b, transmute(c))
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(udot, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vdot_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint8x8_t,
+    c: uint8x8_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
+    vdot_u32(a, b, transmute(c))
 }
-#[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"]
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 0)
+    assert_instr(udot, LANE = 0)
 )]
-#[rustc_legacy_const_generics(1)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vdotq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint8x16_t,
+    c: uint8x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let c: uint32x2_t = transmute(c);
+    let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vdotq_u32(a, b, transmute(c))
+}
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v2i32.v8i8"
+        )]
+        fn _vdot_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    _vdot_s32(a, b, c)
+}
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sdot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sdot.v4i32.v16i8"
+        )]
+        fn _vdotq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vdotq_s32(a, b, c)
+}
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdot_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v2i32.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v2i32.v8i8"
+        )]
+        fn _vdot_u32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t;
+    }
+    _vdot_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
+}
+#[doc = "Dot product arithmetic (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_dotprod", issue = "117224")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.udot.v4i32.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.udot.v4i32.v16i8"
+        )]
+        fn _vdotq_u32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vdotq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5419,21 +6548,21 @@ pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdup_lane_f32<const N: i32>(a: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5444,21 +6573,21 @@ pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdup_lane_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5469,21 +6598,21 @@ pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdup_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5494,21 +6623,21 @@ pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdupq_lane_f32<const N: i32>(a: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5519,21 +6648,21 @@ pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdupq_lane_s32<const N: i32>(a: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 2)
+    assert_instr(dup, N = 1)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5544,21 +6673,21 @@ pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
+pub unsafe fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 1);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5569,21 +6698,21 @@ pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5594,21 +6723,21 @@ pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5619,21 +6748,21 @@ pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5644,8 +6773,8 @@ pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(
         a,
         a,
@@ -5653,16 +6782,16 @@ pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5673,8 +6802,8 @@ pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(
         a,
         a,
@@ -5682,16 +6811,16 @@ pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 4)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5702,8 +6831,8 @@ pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 3);
+pub unsafe fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(
         a,
         a,
@@ -5711,16 +6840,16 @@ pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5731,8 +6860,8 @@ pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5740,16 +6869,16 @@ pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5760,8 +6889,8 @@ pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5769,16 +6898,16 @@ pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5789,8 +6918,8 @@ pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5798,16 +6927,16 @@ pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5818,8 +6947,8 @@ pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5830,16 +6959,16 @@ pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5850,8 +6979,8 @@ pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5862,16 +6991,16 @@ pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 8)
+    assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5882,8 +7011,8 @@ pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
+pub unsafe fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
     simd_shuffle!(
         a,
         a,
@@ -5894,16 +7023,16 @@ pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     )
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 1)
+    assert_instr(nop, N = 0)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5914,21 +7043,21 @@ pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<i64, _>(simd_extract!(a, N as u32))
+pub unsafe fn vdup_lane_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert!(N == 0);
+    a
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, N = 1)
+    assert_instr(nop, N = 0)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5939,21 +7068,21 @@ pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 1);
-    transmute::<u64, _>(simd_extract!(a, N as u32))
+pub unsafe fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert!(N == 0);
+    a
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 0)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5964,21 +7093,21 @@ pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
-    static_assert!(N == 0);
+pub unsafe fn vdup_laneq_f32<const N: i32>(a: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 0)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -5989,21 +7118,21 @@ pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
-    static_assert!(N == 0);
+pub unsafe fn vdup_laneq_s32<const N: i32>(a: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -6014,21 +7143,21 @@ pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
+pub unsafe fn vdup_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 2);
     simd_shuffle!(a, a, [N as u32, N as u32])
 }
 #[doc = "Set all vector lanes to the same value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup, N = 1)
+    assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
 #[cfg_attr(
@@ -6039,22 +7168,23 @@ pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    simd_shuffle!(a, a, [N as u32, N as u32])
+pub unsafe fn vdupq_laneq_f32<const N: i32>(a: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6063,21 +7193,23 @@ pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6086,21 +7218,23 @@ pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6109,21 +7243,23 @@ pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6132,21 +7268,23 @@ pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6155,21 +7293,23 @@ pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32])
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6178,21 +7318,27 @@ pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6201,21 +7347,27 @@ pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 4)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6224,21 +7376,27 @@ pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6247,21 +7405,27 @@ pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6270,21 +7434,27 @@ pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6293,21 +7463,27 @@ pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6316,21 +7492,30 @@ pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6339,21 +7524,30 @@ pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(dup, N = 8)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6362,21 +7556,30 @@ pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_xor(a, b)
+pub unsafe fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    simd_shuffle!(
+        a,
+        a,
+        [
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
+            N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
+        ]
+    )
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(nop, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6385,21 +7588,23 @@ pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<i64, _>(simd_extract!(a, N as u32))
 }
-#[doc = "Vector bitwise exclusive or (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(eor)
+    assert_instr(nop, N = 1)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6408,22 +7613,23 @@ pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_xor(a, b)
+pub unsafe fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 1);
+    transmute::<u64, _>(simd_extract!(a, N as u32))
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6432,27 +7638,23 @@ pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vdupq_lane_s64<const N: i32>(a: int64x1_t) -> int64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup, N = 0)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6461,27 +7663,23 @@ pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vdupq_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x2_t {
+    static_assert!(N == 0);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(dup, N = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6490,27 +7688,23 @@ pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub unsafe fn vdupq_laneq_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
+#[doc = "Set all vector lanes to the same value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(dup, N = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6519,33 +7713,22 @@ pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn vdupq_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    simd_shuffle!(a, a, [N as u32, N as u32])
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6554,33 +7737,21 @@ pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6589,33 +7760,21 @@ pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6624,33 +7783,21 @@ pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6659,33 +7806,21 @@ pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 7)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6694,33 +7829,21 @@ pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
-    static_assert_uimm_bits!(N, 3);
-    match N & 0b111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6729,29 +7852,21 @@ pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6760,29 +7875,21 @@ pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6791,29 +7898,21 @@ pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6822,29 +7921,21 @@ pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6853,29 +7944,21 @@ pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 3)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6884,29 +7967,21 @@ pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
-    static_assert_uimm_bits!(N, 2);
-    match N & 0b11 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6915,27 +7990,21 @@ pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 1)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6944,27 +8013,21 @@ pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    match N & 0b1 {
-        0 => simd_shuffle!(a, b, [0, 1]),
-        1 => simd_shuffle!(a, b, [1, 2]),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6973,101 +8036,21 @@ pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(
-            a,
-            b,
-            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-        ),
-        2 => simd_shuffle!(
-            a,
-            b,
-            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-        ),
-        3 => simd_shuffle!(
-            a,
-            b,
-            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-        ),
-        4 => simd_shuffle!(
-            a,
-            b,
-            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-        ),
-        5 => simd_shuffle!(
-            a,
-            b,
-            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-        ),
-        6 => simd_shuffle!(
-            a,
-            b,
-            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-        ),
-        7 => simd_shuffle!(
-            a,
-            b,
-            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-        ),
-        8 => simd_shuffle!(
-            a,
-            b,
-            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-        ),
-        9 => simd_shuffle!(
-            a,
-            b,
-            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-        ),
-        10 => simd_shuffle!(
-            a,
-            b,
-            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-        ),
-        11 => simd_shuffle!(
-            a,
-            b,
-            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-        ),
-        12 => simd_shuffle!(
-            a,
-            b,
-            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-        ),
-        13 => simd_shuffle!(
-            a,
-            b,
-            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-        ),
-        14 => simd_shuffle!(
-            a,
-            b,
-            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-        ),
-        15 => simd_shuffle!(
-            a,
-            b,
-            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-        ),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7076,101 +8059,21 @@ pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(
-            a,
-            b,
-            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-        ),
-        2 => simd_shuffle!(
-            a,
-            b,
-            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-        ),
-        3 => simd_shuffle!(
-            a,
-            b,
-            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-        ),
-        4 => simd_shuffle!(
-            a,
-            b,
-            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-        ),
-        5 => simd_shuffle!(
-            a,
-            b,
-            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-        ),
-        6 => simd_shuffle!(
-            a,
-            b,
-            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-        ),
-        7 => simd_shuffle!(
-            a,
-            b,
-            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-        ),
-        8 => simd_shuffle!(
-            a,
-            b,
-            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-        ),
-        9 => simd_shuffle!(
-            a,
-            b,
-            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-        ),
-        10 => simd_shuffle!(
-            a,
-            b,
-            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-        ),
-        11 => simd_shuffle!(
-            a,
-            b,
-            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-        ),
-        12 => simd_shuffle!(
-            a,
-            b,
-            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-        ),
-        13 => simd_shuffle!(
-            a,
-            b,
-            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-        ),
-        14 => simd_shuffle!(
-            a,
-            b,
-            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-        ),
-        15 => simd_shuffle!(
-            a,
-            b,
-            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-        ),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_xor(a, b)
 }
-#[doc = "Extract vector from pair of vectors"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
+#[doc = "Vector bitwise exclusive or (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veorq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext, N = 15)
+    assert_instr(eor)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7179,100 +8082,22 @@ pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    static_assert_uimm_bits!(N, 4);
-    match N & 0b1111 {
-        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-        1 => simd_shuffle!(
-            a,
-            b,
-            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-        ),
-        2 => simd_shuffle!(
-            a,
-            b,
-            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-        ),
-        3 => simd_shuffle!(
-            a,
-            b,
-            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-        ),
-        4 => simd_shuffle!(
-            a,
-            b,
-            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-        ),
-        5 => simd_shuffle!(
-            a,
-            b,
-            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-        ),
-        6 => simd_shuffle!(
-            a,
-            b,
-            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-        ),
-        7 => simd_shuffle!(
-            a,
-            b,
-            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-        ),
-        8 => simd_shuffle!(
-            a,
-            b,
-            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-        ),
-        9 => simd_shuffle!(
-            a,
-            b,
-            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-        ),
-        10 => simd_shuffle!(
-            a,
-            b,
-            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-        ),
-        11 => simd_shuffle!(
-            a,
-            b,
-            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-        ),
-        12 => simd_shuffle!(
-            a,
-            b,
-            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-        ),
-        13 => simd_shuffle!(
-            a,
-            b,
-            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-        ),
-        14 => simd_shuffle!(
-            a,
-            b,
-            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-        ),
-        15 => simd_shuffle!(
-            a,
-            b,
-            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-        ),
-        _ => unreachable_unchecked(),
-    }
+pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_xor(a, b)
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7281,26 +8106,27 @@ pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
-        fn _vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
+pub unsafe fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
     }
-    _vfma_f32(b, c, a)
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7309,26 +8135,27 @@ pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
-        fn _vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
+pub unsafe fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
     }
-    _vfmaq_f32(b, c, a)
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7337,21 +8164,27 @@ pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfma_f32(a, b, vdup_n_f32_vfp4(c))
+pub unsafe fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
+    }
 }
-#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmla)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7360,21 +8193,33 @@ pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
+pub unsafe fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
+    }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7383,22 +8228,33 @@ pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    let b: float32x2_t = simd_neg(b);
-    vfma_f32(a, b, c)
+pub unsafe fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
+    }
 }
-#[doc = "Floating-point fused multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7407,22 +8263,33 @@ pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    let b: float32x4_t = simd_neg(b);
-    vfmaq_f32(a, b, c)
+pub unsafe fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
+    }
 }
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7431,44 +8298,33 @@ pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vfms_f32(a, b, vdup_n_f32_vfp4(c))
-}
-#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmls)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
+pub unsafe fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
+    }
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7477,29 +8333,33 @@ pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
-        fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+pub unsafe fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_s8(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 7))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 7)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7508,29 +8368,33 @@ pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
-        fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub unsafe fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 3);
+    match N & 0b111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
+        4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
+        5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
+        6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
+        7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_s8(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7539,29 +8403,29 @@ pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
-        fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub unsafe fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_s16(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7570,29 +8434,29 @@ pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
-        fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+pub unsafe fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_s16(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7601,29 +8465,29 @@ pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
-        fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+pub unsafe fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_s32(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7632,29 +8496,29 @@ pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
-        fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+pub unsafe fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_s32(a, b)
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7663,29 +8527,29 @@ pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
-        fn _vhadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+pub unsafe fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 3)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7694,29 +8558,29 @@ pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
-        fn _vhaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub unsafe fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 2);
+    match N & 0b11 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
+        1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
+        2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
+        3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7725,29 +8589,27 @@ pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
-        fn _vhadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub unsafe fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7756,29 +8618,27 @@ pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
-        fn _vhaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+pub unsafe fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    match N & 0b1 {
+        0 => simd_shuffle!(a, b, [0, 1]),
+        1 => simd_shuffle!(a, b, [1, 2]),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7787,29 +8647,101 @@ pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
-        fn _vhadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+pub unsafe fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
     }
-    _vhadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Halving add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhadd)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7818,29 +8750,101 @@ pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
-        fn _vhaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+pub unsafe fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
     }
-    _vhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"]
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 15))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(ext, N = 15)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7849,28 +8853,99 @@ pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
-        fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vhsub_s16(a, b)
-}
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"]
+pub unsafe fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 4);
+    match N & 0b1111 {
+        0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        1 => simd_shuffle!(
+            a,
+            b,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        ),
+        2 => simd_shuffle!(
+            a,
+            b,
+            [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
+        ),
+        3 => simd_shuffle!(
+            a,
+            b,
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
+        ),
+        4 => simd_shuffle!(
+            a,
+            b,
+            [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+        ),
+        5 => simd_shuffle!(
+            a,
+            b,
+            [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
+        ),
+        6 => simd_shuffle!(
+            a,
+            b,
+            [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+        ),
+        7 => simd_shuffle!(
+            a,
+            b,
+            [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
+        ),
+        8 => simd_shuffle!(
+            a,
+            b,
+            [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
+        ),
+        9 => simd_shuffle!(
+            a,
+            b,
+            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+        ),
+        10 => simd_shuffle!(
+            a,
+            b,
+            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
+        ),
+        11 => simd_shuffle!(
+            a,
+            b,
+            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
+        ),
+        12 => simd_shuffle!(
+            a,
+            b,
+            [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
+        ),
+        13 => simd_shuffle!(
+            a,
+            b,
+            [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
+        ),
+        14 => simd_shuffle!(
+            a,
+            b,
+            [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
+        ),
+        15 => simd_shuffle!(
+            a,
+            b,
+            [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
+        ),
+        _ => unreachable_unchecked(),
+    }
+}
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7880,28 +8955,25 @@ pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
-        fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")]
+        fn _vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t;
     }
-    _vhsubq_s16(a, b)
+    _vfma_f32(b, c, a)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7911,28 +8983,25 @@ pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
-        fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")]
+        fn _vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t;
     }
-    _vhsub_s32(a, b)
+    _vfmaq_f32(b, c, a)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7942,28 +9011,20 @@ pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
-        fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vhsubq_s32(a, b)
+pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfma_f32(a, b, vdup_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"]
+#[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7973,28 +9034,20 @@ pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
-        fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vhsub_s8(a, b)
+pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmaq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(shsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8004,28 +9057,21 @@ pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.shsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
-        fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vhsubq_s8(a, b)
+pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    let b: float32x2_t = simd_neg(b);
+    vfma_f32(a, b, c)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"]
+#[doc = "Floating-point fused multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8035,28 +9081,21 @@ pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
-        fn _vhsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vhsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    let b: float32x4_t = simd_neg(b);
+    vfmaq_f32(a, b, c)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"]
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8066,28 +9105,20 @@ pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
-        fn _vhsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vhsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vfms_f32(a, b, vdup_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"]
+#[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(fmls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8097,28 +9128,20 @@ pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
-        fn _vhsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vhsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vfmsq_f32(a, b, vdupq_n_f32_vfp4(c))
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8128,28 +9151,28 @@ pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v8i16"
+            link_name = "llvm.aarch64.neon.shadd.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
-        fn _vhsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
+        fn _vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vhsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+    _vhadd_s8(a, b)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8159,28 +9182,28 @@ pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v2i32"
+            link_name = "llvm.aarch64.neon.shadd.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
-        fn _vhsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
+        fn _vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    _vhsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vhaddq_s8(a, b)
 }
-#[doc = "Signed halving subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uhsub)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8190,28 +9213,28 @@ pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uhsub.v4i32"
+            link_name = "llvm.aarch64.neon.shadd.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
-        fn _vhsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
+        fn _vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vhsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vhadd_s16(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8221,28 +9244,28 @@ pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
+pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32"
+            link_name = "llvm.aarch64.neon.shadd.v8i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")]
-        fn _vld1_f32_x2(a: *const f32) -> float32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
+        fn _vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    _vld1_f32_x2(a)
+    _vhaddq_s16(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8252,28 +9275,28 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
+pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32"
+            link_name = "llvm.aarch64.neon.shadd.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")]
-        fn _vld1_f32_x3(a: *const f32) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
+        fn _vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vld1_f32_x3(a)
+    _vhadd_s32(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8283,28 +9306,28 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
+pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32"
+            link_name = "llvm.aarch64.neon.shadd.v4i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")]
-        fn _vld1_f32_x4(a: *const f32) -> float32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
+        fn _vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _vld1_f32_x4(a)
+    _vhaddq_s32(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8314,28 +9337,28 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
+pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32"
+            link_name = "llvm.aarch64.neon.uhadd.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")]
-        fn _vld1q_f32_x2(a: *const f32) -> float32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
+        fn _vhadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vld1q_f32_x2(a)
+    _vhadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8345,28 +9368,28 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
+pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32"
+            link_name = "llvm.aarch64.neon.uhadd.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")]
-        fn _vld1q_f32_x3(a: *const f32) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
+        fn _vhaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    _vld1q_f32_x3(a)
+    _vhaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8376,28 +9399,28 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
+pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32"
+            link_name = "llvm.aarch64.neon.uhadd.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")]
-        fn _vld1q_f32_x4(a: *const f32) -> float32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
+        fn _vhadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vld1q_f32_x4(a)
+    _vhadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8407,20 +9430,28 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
+        fn _vhaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhadd_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8430,20 +9461,28 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
+        fn _vhadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhaddq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8453,20 +9492,28 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
+pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uhadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
+        fn _vhaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vhaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8476,20 +9523,28 @@ pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")]
+        fn _vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vhsub_s16(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8499,20 +9554,28 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")]
+        fn _vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vhsubq_s16(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8522,20 +9585,28 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.shsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")]
+        fn _vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vhsub_s32(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8545,28 +9616,28 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
+pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8"
+            link_name = "llvm.aarch64.neon.shsub.v4i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")]
-        fn _vld1_s8_x2(a: *const i8) -> int8x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")]
+        fn _vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _vld1_s8_x2(a)
+    _vhsubq_s32(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8576,28 +9647,28 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
+pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8"
+            link_name = "llvm.aarch64.neon.shsub.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")]
-        fn _vld1_s8_x3(a: *const i8) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")]
+        fn _vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vld1_s8_x3(a)
+    _vhsub_s8(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(shsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8607,28 +9678,28 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
+pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8"
+            link_name = "llvm.aarch64.neon.shsub.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")]
-        fn _vld1_s8_x4(a: *const i8) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")]
+        fn _vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    _vld1_s8_x4(a)
+    _vhsubq_s8(a, b)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8638,28 +9709,28 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
+pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.uhsub.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")]
-        fn _vld1q_s8_x2(a: *const i8) -> int8x16x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")]
+        fn _vhsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vld1q_s8_x2(a)
+    _vhsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8669,28 +9740,28 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
+pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.uhsub.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")]
-        fn _vld1q_s8_x3(a: *const i8) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")]
+        fn _vhsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    _vld1q_s8_x3(a)
+    _vhsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8700,28 +9771,28 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
+pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8"
+            link_name = "llvm.aarch64.neon.uhsub.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")]
-        fn _vld1q_s8_x4(a: *const i8) -> int8x16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")]
+        fn _vhsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vld1q_s8_x4(a)
+    _vhsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8731,28 +9802,28 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
+pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16"
+            link_name = "llvm.aarch64.neon.uhsub.v8i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")]
-        fn _vld1_s16_x2(a: *const i16) -> int16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")]
+        fn _vhsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    _vld1_s16_x2(a)
+    _vhsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsub_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8762,28 +9833,28 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
+pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16"
+            link_name = "llvm.aarch64.neon.uhsub.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")]
-        fn _vld1_s16_x3(a: *const i16) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")]
+        fn _vhsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vld1_s16_x3(a)
+    _vhsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
+#[doc = "Signed halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vhsubq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(uhsub)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8793,19 +9864,504 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
+pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16"
+            link_name = "llvm.aarch64.neon.uhsub.v4i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")]
-        fn _vld1_s16_x4(a: *const i16) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")]
+        fn _vhsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    _vld1_s16_x4(a)
+    _vhsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
+    transmute(vld1_v2f32(
+        ptr as *const i8,
+        crate::mem::align_of::<f32>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(vld1_v2f32(
+        ptr as *const i8,
+        crate::mem::align_of::<f32>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
+    transmute(vld1q_v4f32(
+        ptr as *const i8,
+        crate::mem::align_of::<f32>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
+    let ret_val: float32x4_t = transmute(vld1q_v4f32(
+        ptr as *const i8,
+        crate::mem::align_of::<f32>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
+    transmute(vld1_v8i8(
+        ptr as *const i8,
+        crate::mem::align_of::<u8>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(vld1_v8i8(
+        ptr as *const i8,
+        crate::mem::align_of::<u8>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
+    transmute(vld1q_v16i8(
+        ptr as *const i8,
+        crate::mem::align_of::<u8>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
+    let ret_val: uint8x16_t = transmute(vld1q_v16i8(
+        ptr as *const i8,
+        crate::mem::align_of::<u8>() as i32,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
+    transmute(vld1_v4i16(
+        ptr as *const i8,
+        crate::mem::align_of::<u16>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(vld1_v4i16(
+        ptr as *const i8,
+        crate::mem::align_of::<u16>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
+    transmute(vld1q_v8i16(
+        ptr as *const i8,
+        crate::mem::align_of::<u16>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
+    let ret_val: uint16x8_t = transmute(vld1q_v8i16(
+        ptr as *const i8,
+        crate::mem::align_of::<u16>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
+    transmute(vld1_v2i32(
+        ptr as *const i8,
+        crate::mem::align_of::<u32>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(vld1_v2i32(
+        ptr as *const i8,
+        crate::mem::align_of::<u32>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
+    transmute(vld1q_v4i32(
+        ptr as *const i8,
+        crate::mem::align_of::<u32>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
+    let ret_val: uint32x4_t = transmute(vld1q_v4i32(
+        ptr as *const i8,
+        crate::mem::align_of::<u32>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
+    transmute(vld1_v1i64(
+        ptr as *const i8,
+        crate::mem::align_of::<u64>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
+    transmute(vld1q_v2i64(
+        ptr as *const i8,
+        crate::mem::align_of::<u64>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
+    let ret_val: uint64x2_t = transmute(vld1q_v2i64(
+        ptr as *const i8,
+        crate::mem::align_of::<u64>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
+    transmute(vld1_v8i8(
+        ptr as *const i8,
+        crate::mem::align_of::<p8>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(vld1_v8i8(
+        ptr as *const i8,
+        crate::mem::align_of::<p8>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
+    transmute(vld1q_v16i8(
+        ptr as *const i8,
+        crate::mem::align_of::<p8>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
+    let ret_val: poly8x16_t = transmute(vld1q_v16i8(
+        ptr as *const i8,
+        crate::mem::align_of::<p8>() as i32,
+    ));
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
+    transmute(vld1_v4i16(
+        ptr as *const i8,
+        crate::mem::align_of::<p16>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(vld1_v4i16(
+        ptr as *const i8,
+        crate::mem::align_of::<p16>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
+    transmute(vld1q_v8i16(
+        ptr as *const i8,
+        crate::mem::align_of::<p16>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
+    let ret_val: poly16x8_t = transmute(vld1q_v8i16(
+        ptr as *const i8,
+        crate::mem::align_of::<p16>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
+    transmute(vld1q_v2i64(
+        ptr as *const i8,
+        crate::mem::align_of::<p64>() as i32,
+    ))
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
+    let ret_val: poly64x2_t = transmute(vld1q_v2i64(
+        ptr as *const i8,
+        crate::mem::align_of::<p64>() as i32,
+    ));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8824,19 +10380,19 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
+pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16"
+            link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")]
-        fn _vld1q_s16_x2(a: *const i16) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0")]
+        fn _vld1_f32_x2(a: *const f32) -> float32x2x2_t;
     }
-    _vld1q_s16_x2(a)
+    _vld1_f32_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8855,19 +10411,19 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
+pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16"
+            link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")]
-        fn _vld1q_s16_x3(a: *const i16) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0")]
+        fn _vld1_f32_x3(a: *const f32) -> float32x2x3_t;
     }
-    _vld1q_s16_x3(a)
+    _vld1_f32_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8886,19 +10442,19 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
+pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16"
+            link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")]
-        fn _vld1q_s16_x4(a: *const i16) -> int16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0")]
+        fn _vld1_f32_x4(a: *const f32) -> float32x2x4_t;
     }
-    _vld1q_s16_x4(a)
+    _vld1_f32_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8917,19 +10473,19 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
+pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32"
+            link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")]
-        fn _vld1_s32_x2(a: *const i32) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0")]
+        fn _vld1q_f32_x2(a: *const f32) -> float32x4x2_t;
     }
-    _vld1_s32_x2(a)
+    _vld1q_f32_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8948,19 +10504,19 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
+pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32"
+            link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")]
-        fn _vld1_s32_x3(a: *const i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0")]
+        fn _vld1q_f32_x3(a: *const f32) -> float32x4x3_t;
     }
-    _vld1_s32_x3(a)
+    _vld1q_f32_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -8979,24 +10535,42 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
+pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32"
+            link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")]
-        fn _vld1_s32_x4(a: *const i32) -> int32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0")]
+        fn _vld1q_f32_x4(a: *const f32) -> float32x4x4_t;
     }
-    _vld1_s32_x4(a)
+    _vld1q_f32_x4(a)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,aes")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
+    let a: *const i8 = ptr as *const i8;
+    let b: i32 = crate::mem::align_of::<p64>() as i32;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
+        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
+    }
+    transmute(_vld1_v1i64(a, b))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -9010,25 +10584,17 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")]
-        fn _vld1q_s32_x2(a: *const i32) -> int32x4x2_t;
-    }
-    _vld1q_s32_x2(a)
+pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
+    transmute(vld1_s64_x2(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9041,25 +10607,17 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")]
-        fn _vld1q_s32_x3(a: *const i32) -> int32x4x3_t;
-    }
-    _vld1q_s32_x3(a)
+pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
+    transmute(vld1_s64_x3(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9072,25 +10630,18 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")]
-        fn _vld1q_s32_x4(a: *const i32) -> int32x4x4_t;
-    }
-    _vld1q_s32_x4(a)
+pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
+    transmute(vld1_s64_x4(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9103,25 +10654,18 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")]
-        fn _vld1_s64_x2(a: *const i64) -> int64x1x2_t;
-    }
-    _vld1_s64_x2(a)
+pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
+    transmute(vld1q_s64_x2(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9134,25 +10678,21 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")]
-        fn _vld1_s64_x3(a: *const i64) -> int64x1x3_t;
-    }
-    _vld1_s64_x3(a)
+pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
+    let mut ret_val: poly64x2x2_t = transmute(vld1q_s64_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9165,25 +10705,18 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")]
-        fn _vld1_s64_x4(a: *const i64) -> int64x1x4_t;
-    }
-    _vld1_s64_x4(a)
+pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
+    transmute(vld1q_s64_x3(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9196,25 +10729,22 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")]
-        fn _vld1q_s64_x2(a: *const i64) -> int64x2x2_t;
-    }
-    _vld1q_s64_x2(a)
+pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
+    let mut ret_val: poly64x2x3_t = transmute(vld1q_s64_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9227,25 +10757,18 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")]
-        fn _vld1q_s64_x3(a: *const i64) -> int64x2x3_t;
-    }
-    _vld1q_s64_x3(a)
+pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
+    transmute(vld1q_s64_x4(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1)
@@ -9258,19 +10781,112 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")]
-        fn _vld1q_s64_x4(a: *const i64) -> int64x2x4_t;
-    }
-    _vld1q_s64_x4(a)
+pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
+    let mut ret_val: poly64x2x4_t = transmute(vld1q_s64_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
+    vld1_v8i8(ptr as *const i8, crate::mem::align_of::<i8>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
+pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
+    vld1q_v16i8(ptr as *const i8, crate::mem::align_of::<i8>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
+    vld1_v4i16(ptr as *const i8, crate::mem::align_of::<i16>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
+pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
+    vld1q_v8i16(ptr as *const i8, crate::mem::align_of::<i16>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
+    vld1_v2i32(ptr as *const i8, crate::mem::align_of::<i32>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
+pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
+    vld1q_v4i32(ptr as *const i8, crate::mem::align_of::<i32>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
+pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
+    vld1_v1i64(ptr as *const i8, crate::mem::align_of::<i64>() as i32)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
+pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
+    vld1q_v2i64(ptr as *const i8, crate::mem::align_of::<i64>() as i32)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9289,11 +10905,19 @@ pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
-    transmute(vld1_s8_x2(transmute(a)))
+pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0")]
+        fn _vld1_s8_x2(a: *const i8) -> int8x8x2_t;
+    }
+    _vld1_s8_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9312,11 +10936,19 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0")]
+        fn _vld1_s8_x3(a: *const i8) -> int8x8x3_t;
+    }
+    _vld1_s8_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9335,11 +10967,19 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0")]
+        fn _vld1_s8_x4(a: *const i8) -> int8x8x4_t;
+    }
+    _vld1_s8_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9358,11 +10998,19 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0")]
+        fn _vld1q_s8_x2(a: *const i8) -> int8x16x2_t;
+    }
+    _vld1q_s8_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9381,11 +11029,19 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0")]
+        fn _vld1q_s8_x3(a: *const i8) -> int8x16x3_t;
+    }
+    _vld1q_s8_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9404,11 +11060,19 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0")]
+        fn _vld1q_s8_x4(a: *const i8) -> int8x16x4_t;
+    }
+    _vld1q_s8_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9427,11 +11091,19 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0")]
+        fn _vld1_s16_x2(a: *const i16) -> int16x4x2_t;
+    }
+    _vld1_s16_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9450,11 +11122,19 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0")]
+        fn _vld1_s16_x3(a: *const i16) -> int16x4x3_t;
+    }
+    _vld1_s16_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9473,11 +11153,19 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0")]
+        fn _vld1_s16_x4(a: *const i16) -> int16x4x4_t;
+    }
+    _vld1_s16_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9496,11 +11184,19 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0")]
+        fn _vld1q_s16_x2(a: *const i16) -> int16x8x2_t;
+    }
+    _vld1q_s16_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9519,11 +11215,19 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0")]
+        fn _vld1q_s16_x3(a: *const i16) -> int16x8x3_t;
+    }
+    _vld1q_s16_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9542,11 +11246,19 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0")]
+        fn _vld1q_s16_x4(a: *const i16) -> int16x8x4_t;
+    }
+    _vld1q_s16_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9565,11 +11277,19 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
-    transmute(vld1_s32_x2(transmute(a)))
+pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0")]
+        fn _vld1_s32_x2(a: *const i32) -> int32x2x2_t;
+    }
+    _vld1_s32_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9588,11 +11308,19 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
-    transmute(vld1_s32_x3(transmute(a)))
+pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0")]
+        fn _vld1_s32_x3(a: *const i32) -> int32x2x3_t;
+    }
+    _vld1_s32_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9611,11 +11339,19 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
-    transmute(vld1_s32_x4(transmute(a)))
+pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0")]
+        fn _vld1_s32_x4(a: *const i32) -> int32x2x4_t;
+    }
+    _vld1_s32_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9634,11 +11370,19 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
-    transmute(vld1q_s32_x2(transmute(a)))
+pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0")]
+        fn _vld1q_s32_x2(a: *const i32) -> int32x4x2_t;
+    }
+    _vld1q_s32_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9657,11 +11401,19 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
-    transmute(vld1q_s32_x3(transmute(a)))
+pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0")]
+        fn _vld1q_s32_x3(a: *const i32) -> int32x4x3_t;
+    }
+    _vld1q_s32_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9680,11 +11432,19 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
-    transmute(vld1q_s32_x4(transmute(a)))
+pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0")]
+        fn _vld1q_s32_x4(a: *const i32) -> int32x4x4_t;
+    }
+    _vld1q_s32_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9703,11 +11463,19 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0")]
+        fn _vld1_s64_x2(a: *const i64) -> int64x1x2_t;
+    }
+    _vld1_s64_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9726,11 +11494,19 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0")]
+        fn _vld1_s64_x3(a: *const i64) -> int64x1x3_t;
+    }
+    _vld1_s64_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9749,11 +11525,19 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
+pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0")]
+        fn _vld1_s64_x4(a: *const i64) -> int64x1x4_t;
+    }
+    _vld1_s64_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9772,11 +11556,19 @@ pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0")]
+        fn _vld1q_s64_x2(a: *const i64) -> int64x2x2_t;
+    }
+    _vld1q_s64_x2(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9795,11 +11587,19 @@ pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0")]
+        fn _vld1q_s64_x3(a: *const i64) -> int64x2x3_t;
+    }
+    _vld1q_s64_x3(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -9818,14 +11618,23 @@ pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0")]
+        fn _vld1q_s64_x4(a: *const i64) -> int64x2x4_t;
+    }
+    _vld1q_s64_x4(a)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9841,14 +11650,15 @@ pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
     transmute(vld1_s8_x2(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9864,14 +11674,18 @@ pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
+    let mut ret_val: uint8x8x2_t = transmute(vld1_s8_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9887,14 +11701,15 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9910,14 +11725,19 @@ pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
+    let mut ret_val: uint8x8x3_t = transmute(vld1_s8_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9933,14 +11753,15 @@ pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9956,14 +11777,20 @@ pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
+    let mut ret_val: uint8x8x4_t = transmute(vld1_s8_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -9979,14 +11806,15 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -10002,14 +11830,26 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
+    let mut ret_val: uint8x16x2_t = transmute(vld1q_s8_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -10025,14 +11865,15 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -10048,14 +11889,31 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    let mut ret_val: uint8x16x3_t = transmute(vld1q_s8_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -10071,14 +11929,15 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
@@ -10094,300 +11953,169 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    let mut ret_val: uint8x16x4_t = transmute(vld1q_s8_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0i8")]
-        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-    _vld2_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0i8")]
-        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
-    }
-    _vld2q_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0i8")]
-        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
-    }
-    _vld2_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0i8")]
-        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
-    }
-    _vld2q_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0i8")]
-        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
-    }
-    _vld2_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0i8")]
-        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
-    }
-    _vld2q_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0i8")]
-        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
-    }
-    _vld2_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0i8")]
-        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
-    }
-    _vld2q_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32"
-        )]
-        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
-    }
-    _vld2_dup_f32(a as _)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32"
-        )]
-        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
-    }
-    _vld2q_dup_f32(a as _)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8"
-        )]
-        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
-    }
-    _vld2_dup_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8"
-        )]
-        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
-    }
-    _vld2q_dup_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    let mut ret_val: uint16x4x2_t = transmute(vld1_s16_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16"
-        )]
-        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
-    }
-    _vld2_dup_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16"
-        )]
-        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
-    }
-    _vld2q_dup_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    let mut ret_val: uint16x4x3_t = transmute(vld1_s16_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32"
-        )]
-        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
-    }
-    _vld2_dup_s32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32"
-        )]
-        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
-    }
-    _vld2q_dup_s32(a as _)
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10397,55 +12125,50 @@ pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0i8")]
-        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
-    }
-    _vld2_dup_s64(a as *const i8, 8)
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    let mut ret_val: uint16x4x4_t = transmute(vld1_s16_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64"
-        )]
-        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
-    }
-    _vld2_dup_s64(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10455,20 +12178,24 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    let mut ret_val: uint16x8x2_t = transmute(vld1q_s16_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10478,20 +12205,21 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10501,20 +12229,25 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    let mut ret_val: uint16x8x3_t = transmute(vld1q_s16_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10524,20 +12257,21 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10547,20 +12281,26 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    let mut ret_val: uint16x8x4_t = transmute(vld1q_s16_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10570,20 +12310,21 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_dup_s32(transmute(a)))
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    transmute(vld1_s32_x2(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10593,20 +12334,24 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_dup_s32(transmute(a)))
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    let mut ret_val: uint32x2x2_t = transmute(vld1_s32_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10616,20 +12361,21 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    transmute(vld1_s32_x3(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10639,20 +12385,25 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    let mut ret_val: uint32x2x3_t = transmute(vld1_s32_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10662,20 +12413,21 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    transmute(vld1_s32_x4(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -10685,613 +12437,432 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    let mut ret_val: uint32x2x4_t = transmute(vld1_s32_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32.p0i8")]
-        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-    _vld2_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32.p0i8")]
-        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
-    }
-    _vld2q_f32(a as *const i8, 4)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    transmute(vld1q_s32_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8.p0i8")]
-        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
-    }
-    _vld2_s8(a as *const i8, 1)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    let mut ret_val: uint32x4x2_t = transmute(vld1q_s32_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8.p0i8")]
-        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
-    }
-    _vld2q_s8(a as *const i8, 1)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    transmute(vld1q_s32_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16.p0i8")]
-        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
-    }
-    _vld2_s16(a as *const i8, 2)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    let mut ret_val: uint32x4x3_t = transmute(vld1q_s32_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16.p0i8")]
-        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
-    }
-    _vld2q_s16(a as *const i8, 2)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    transmute(vld1q_s32_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32.p0i8")]
-        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
-    }
-    _vld2_s32(a as *const i8, 4)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    let mut ret_val: uint32x4x4_t = transmute(vld1q_s32_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32.p0i8")]
-        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
-    }
-    _vld2q_s32(a as *const i8, 4)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
+    transmute(vld1_s64_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32"
-        )]
-        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
-    }
-    _vld2_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
+    transmute(vld1_s64_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32"
-        )]
-        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
-    }
-    _vld2q_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
+    transmute(vld1_s64_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8"
-        )]
-        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
-    }
-    _vld2_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    transmute(vld1q_s64_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8"
-        )]
-        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
-    }
-    _vld2q_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    let mut ret_val: uint64x2x2_t = transmute(vld1q_s64_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16"
-        )]
-        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
-    }
-    _vld2_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    transmute(vld1q_s64_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16"
-        )]
-        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
-    }
-    _vld2q_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    let mut ret_val: uint64x2x3_t = transmute(vld1q_s64_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32"
-        )]
-        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
-    }
-    _vld2_s32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    transmute(vld1q_s64_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32"
-        )]
-        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
-    }
-    _vld2q_s32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    let mut ret_val: uint64x2x4_t = transmute(vld1q_s64_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8"
-        )]
-        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
-    }
-    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    transmute(vld1_s8_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8"
-        )]
-        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
-            -> float32x4x2_t;
-    }
-    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8"
-        )]
-        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
-    }
-    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8"
-        )]
-        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8"
-        )]
-        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8"
-        )]
-        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
-    }
-    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8"
-        )]
-        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
-    }
-    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0i8")]
-        fn _vld2_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x2_t;
-    }
-    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0i8")]
-        fn _vld2q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x2_t;
-    }
-    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0i8")]
-        fn _vld2q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0i8")]
-        fn _vld2q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x2_t;
-    }
-    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0i8")]
-        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
-            -> int8x8x2_t;
-    }
-    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0i8")]
-        fn _vld2_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0i8")]
-        fn _vld2_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x2_t;
-    }
-    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11300,23 +12871,25 @@ pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> i
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    let mut ret_val: poly8x8x2_t = transmute(vld1_s8_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11325,23 +12898,22 @@ pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    transmute(vld1_s8_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11350,23 +12922,26 @@ pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    let mut ret_val: poly8x8x3_t = transmute(vld1_s8_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11375,23 +12950,22 @@ pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    transmute(vld1_s8_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11400,23 +12974,27 @@ pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    let mut ret_val: poly8x8x4_t = transmute(vld1_s8_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11425,23 +13003,22 @@ pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    transmute(vld1q_s8_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11450,23 +13027,33 @@ pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    let mut ret_val: poly8x16x2_t = transmute(vld1q_s8_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld1)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -11475,21 +13062,21 @@ pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    transmute(vld1q_s8_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11499,55 +13086,37 @@ pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64.p0i8")]
-        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
-    }
-    _vld2_s64(a as *const i8, 8)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64"
-        )]
-        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
-    }
-    _vld2_s64(a as _)
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    let mut ret_val: poly8x16x3_t = transmute(vld1q_s8_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11557,20 +13126,21 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    transmute(vld1q_s8_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11580,20 +13150,42 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    let mut ret_val: poly8x16x4_t = transmute(vld1q_s8_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11603,20 +13195,21 @@ pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    transmute(vld1_s16_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11626,20 +13219,24 @@ pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    let mut ret_val: poly16x4x2_t = transmute(vld1_s16_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11649,20 +13246,21 @@ pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    transmute(vld1_s16_x3(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11672,20 +13270,25 @@ pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_s32(transmute(a)))
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    let mut ret_val: poly16x4x3_t = transmute(vld1_s16_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11695,20 +13298,21 @@ pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_s32(transmute(a)))
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    transmute(vld1_s16_x4(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11718,20 +13322,26 @@ pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    let mut ret_val: poly16x4x4_t = transmute(vld1_s16_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11741,20 +13351,21 @@ pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    transmute(vld1q_s16_x2(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11764,20 +13375,24 @@ pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    let mut ret_val: poly16x8x2_t = transmute(vld1q_s16_x2(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld1)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -11787,450 +13402,531 @@ pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    transmute(vld1q_s16_x3(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32"
-        )]
-        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
-    }
-    _vld3_dup_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    let mut ret_val: poly16x8x3_t = transmute(vld1q_s16_x3(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32"
-        )]
-        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
-    }
-    _vld3q_dup_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    transmute(vld1q_s16_x4(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld1)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    let mut ret_val: poly16x8x4_t = transmute(vld1q_s16_x4(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v1i64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v1i64(a: *const i8, b: i32) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8"
-        )]
-        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
+        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
     }
-    _vld3_dup_s8(a as _)
+    _vld1_v1i64(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v2f32(a: *const i8, b: i32) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8"
-        )]
-        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")]
+        fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t;
     }
-    _vld3q_dup_s8(a as _)
+    _vld1_v2f32(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2i32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v2i32(a: *const i8, b: i32) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16"
-        )]
-        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")]
+        fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t;
     }
-    _vld3_dup_s16(a as _)
+    _vld1_v2i32(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4i16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v4i16(a: *const i8, b: i32) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16"
-        )]
-        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")]
+        fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t;
     }
-    _vld3q_dup_s16(a as _)
+    _vld1_v4i16(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v8i8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v8i8(a: *const i8, b: i32) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32"
-        )]
-        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")]
+        fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t;
     }
-    _vld3_dup_s32(a as _)
+    _vld1_v8i8(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v16i8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32"
-        )]
-        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")]
+        fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t;
     }
-    _vld3q_dup_s32(a as _)
+    _vld1q_v16i8(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v2i64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64"
-        )]
-        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")]
+        fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t;
     }
-    _vld3_dup_s64(a as _)
+    _vld1q_v2i64(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
+unsafe fn vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0i8")]
-        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")]
+        fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t;
     }
-    _vld3_dup_f32(a as *const i8, 4)
+    _vld1q_v4f32(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4i32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")]
+        fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t;
+    }
+    _vld1q_v4i32(a, b)
+}
+#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8i16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
+unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0i8")]
-        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")]
+        fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t;
     }
-    _vld3q_dup_f32(a as *const i8, 4)
+    _vld1q_v8i16(a, b)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0i8")]
-        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")]
+        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-    _vld3_dup_s8(a as *const i8, 1)
+    _vld2_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0i8")]
-        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")]
+        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-    _vld3q_dup_s8(a as *const i8, 1)
+    _vld2q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0i8")]
-        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")]
+        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
-    _vld3_dup_s16(a as *const i8, 2)
+    _vld2_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0i8")]
-        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")]
+        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
-    _vld3q_dup_s16(a as *const i8, 2)
+    _vld2q_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0i8")]
-        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")]
+        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
-    _vld3_dup_s32(a as *const i8, 4)
+    _vld2_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0i8")]
-        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")]
+        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
-    _vld3q_dup_s32(a as *const i8, 4)
+    _vld2q_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")]
+        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0i8")]
-        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")]
+        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
-    _vld3_dup_s64(a as *const i8, 8)
+    _vld2q_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0"
+        )]
+        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
+    }
+    _vld2_dup_f32(a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0"
+        )]
+        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
+    }
+    _vld2q_dup_f32(a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0"
+        )]
+        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
+    }
+    _vld2_dup_s8(a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0"
+        )]
+        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0"
+        )]
+        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0"
+        )]
+        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0"
+        )]
+        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0"
+        )]
+        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12240,20 +13936,55 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")]
+        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
+    }
+    _vld2_dup_s64(a as *const i8, 8)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0"
+        )]
+        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
+    }
+    _vld2_dup_s64(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12263,20 +13994,21 @@ pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_dup_s32(transmute(a)))
+pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12286,20 +14018,21 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_dup_s32(transmute(a)))
+pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12309,20 +14042,24 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
+    let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12332,20 +14069,21 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12355,20 +14093,32 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
+    let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -12378,639 +14128,1003 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32"
-        )]
-        fn _vld3_f32(ptr: *const float32x2_t) -> float32x2x3_t;
-    }
-    _vld3_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
+    let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32"
-        )]
-        fn _vld3q_f32(ptr: *const float32x4_t) -> float32x4x3_t;
-    }
-    _vld3q_f32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8"
-        )]
-        fn _vld3_s8(ptr: *const int8x8_t) -> int8x8x3_t;
-    }
-    _vld3_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
+    let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8"
-        )]
-        fn _vld3q_s8(ptr: *const int8x16_t) -> int8x16x3_t;
-    }
-    _vld3q_s8(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16"
-        )]
-        fn _vld3_s16(ptr: *const int16x4_t) -> int16x4x3_t;
-    }
-    _vld3_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
+    let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16"
-        )]
-        fn _vld3q_s16(ptr: *const int16x8_t) -> int16x8x3_t;
-    }
-    _vld3q_s16(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32"
-        )]
-        fn _vld3_s32(ptr: *const int32x2_t) -> int32x2x3_t;
-    }
-    _vld3_s32(a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
+    let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32"
-        )]
-        fn _vld3q_s32(ptr: *const int32x4_t) -> int32x4x3_t;
-    }
-    _vld3q_s32(a as _)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0i8")]
-        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-    _vld3_f32(a as *const i8, 4)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0i8")]
-        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
-    }
-    _vld3q_f32(a as *const i8, 4)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
+    let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0i8")]
-        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-    _vld3_s8(a as *const i8, 1)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
+    let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
+    let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
+    let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0i8")]
-        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")]
+        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-    _vld3q_s8(a as *const i8, 1)
+    _vld2_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0i8")]
-        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")]
+        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-    _vld3_s16(a as *const i8, 2)
+    _vld2q_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")]
+        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
+    }
+    _vld2_s8(a as *const i8, 1)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0i8")]
-        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")]
+        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
-    _vld3q_s16(a as *const i8, 2)
+    _vld2q_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")]
+        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
+    }
+    _vld2_s16(a as *const i8, 2)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0i8")]
-        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")]
+        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
-    _vld3_s32(a as *const i8, 4)
+    _vld2q_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")]
+        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+    }
+    _vld2_s32(a as *const i8, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0i8")]
-        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")]
+        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
-    _vld3q_s32(a as *const i8, 4)
+    _vld2q_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v2f32.p0"
         )]
-        fn _vld3_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x3_t;
+        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
     }
-    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v4f32.p0"
         )]
-        fn _vld3q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x3_t;
+        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
     }
-    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0i8")]
-        fn _vld3_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v8i8.p0"
+        )]
+        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
     }
-    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v16i8.p0"
         )]
-        fn _vld3_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x3_t;
+        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
     }
-    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v4i16.p0"
         )]
-        fn _vld3_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x3_t;
+        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
     }
-    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 4);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v8i16.p0"
         )]
-        fn _vld3q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x3_t;
+        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
     }
-    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v2i32.p0"
         )]
-        fn _vld3_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x3_t;
+        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
     }
-    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8"
+            link_name = "llvm.aarch64.neon.ld2.v4i32.p0"
         )]
-        fn _vld3q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x3_t;
+        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
     }
-    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld2q_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0i8")]
-        fn _vld3_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0"
+        )]
+        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
     }
-    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0i8")]
-        fn _vld3_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0"
+        )]
+        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
+            -> float32x4x2_t;
     }
-    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0i8")]
-        fn _vld3q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0"
+        )]
+        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
     }
-    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0i8")]
-        fn _vld3_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0"
+        )]
+        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
     }
-    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0"
+        )]
+        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
+    }
+    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0"
+        )]
+        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
+    }
+    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0"
+        )]
+        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
+    }
+    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")]
+        fn _vld2_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x2_t;
+    }
+    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0i8")]
-        fn _vld3q_lane_s32(
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")]
+        fn _vld2q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x2_t;
+    }
+    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")]
+        fn _vld2q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x2_t;
+    }
+    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")]
+        fn _vld2q_lane_s32(
             ptr: *const i8,
             a: int32x4_t,
             b: int32x4_t,
-            c: int32x4_t,
             n: i32,
             size: i32,
-        ) -> int32x4x3_t;
+        ) -> int32x4x2_t;
     }
-    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")]
+        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
+            -> int8x8x2_t;
+    }
+    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")]
+        fn _vld2_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x2_t;
+    }
+    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")]
+        fn _vld2_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x2_t;
+    }
+    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13021,21 +15135,21 @@ pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
+pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13046,21 +15160,21 @@ pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
+pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13071,21 +15185,21 @@ pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
+pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13096,21 +15210,21 @@ pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
+pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13121,21 +15235,21 @@ pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
+pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13146,21 +15260,21 @@ pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
+pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13171,21 +15285,21 @@ pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
+pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld2, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -13196,12 +15310,12 @@ pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
+pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -13220,46 +15334,46 @@ pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
+pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64"
-        )]
-        fn _vld3_s64(ptr: *const int64x1_t) -> int64x1x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")]
+        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
     }
-    _vld3_s64(a as _)
+    _vld2_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0i8")]
-        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1i64.p0"
+        )]
+        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
     }
-    _vld3_s64(a as *const i8, 8)
+    _vld2_s64(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -13278,20 +15392,21 @@ pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
+pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13301,20 +15416,21 @@ pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
+pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13324,20 +15440,24 @@ pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
+    let mut ret_val: uint8x8x2_t = transmute(vld2_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13347,20 +15467,21 @@ pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13370,20 +15491,32 @@ pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
+    let mut ret_val: uint8x16x2_t = transmute(vld2q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13393,20 +15526,21 @@ pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_s32(transmute(a)))
+pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13416,20 +15550,24 @@ pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_s32(transmute(a)))
+pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
+    let mut ret_val: uint16x4x2_t = transmute(vld2_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13439,20 +15577,21 @@ pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
+pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13462,20 +15601,24 @@ pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
+    let mut ret_val: uint16x8x2_t = transmute(vld2q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13485,20 +15628,21 @@ pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13508,429 +15652,48 @@ pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
+    let mut ret_val: uint32x2x2_t = transmute(vld2_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0i8")]
-        fn _vld3q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x3_t;
-    }
-    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_s32(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0i8")]
-        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
-    }
-    _vld4_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0i8")]
-        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
-    }
-    _vld4q_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0i8")]
-        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
-    }
-    _vld4_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0i8")]
-        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
-    }
-    _vld4q_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0i8")]
-        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
-    }
-    _vld4_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0i8")]
-        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
-    }
-    _vld4q_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0i8")]
-        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
-    }
-    _vld4_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0i8")]
-        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
-    }
-    _vld4q_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32"
-        )]
-        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
-    }
-    _vld4_dup_f32(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32"
-        )]
-        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
-    }
-    _vld4q_dup_f32(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8"
-        )]
-        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
-    }
-    _vld4_dup_s8(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8"
-        )]
-        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
-    }
-    _vld4q_dup_s8(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16"
-        )]
-        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
-    }
-    _vld4_dup_s16(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16"
-        )]
-        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
-    }
-    _vld4q_dup_s16(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32"
-        )]
-        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
-    }
-    _vld4_dup_s32(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32"
-        )]
-        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
-    }
-    _vld4q_dup_s32(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64"
-        )]
-        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
-    }
-    _vld4_dup_s64(a as _)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0i8")]
-        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
-    }
-    _vld4_dup_s64(a as *const i8, 8)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13940,20 +15703,24 @@ pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
+    let mut ret_val: uint32x4x2_t = transmute(vld2q_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13963,20 +15730,21 @@ pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -13986,20 +15754,24 @@ pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
+    let mut ret_val: poly8x8x2_t = transmute(vld2_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14009,20 +15781,21 @@ pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_dup_s32(transmute(a)))
+pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14032,20 +15805,32 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_dup_s32(transmute(a)))
+pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
+    let mut ret_val: poly8x16x2_t = transmute(vld2q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14055,20 +15840,21 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14078,20 +15864,24 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
+    let mut ret_val: poly16x4x2_t = transmute(vld2_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14101,20 +15891,21 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -14124,705 +15915,526 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
+    let mut ret_val: poly16x8x2_t = transmute(vld2q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32"
+            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0"
         )]
-        fn _vld4_f32(ptr: *const float32x2_t) -> float32x2x4_t;
+        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
     }
-    _vld4_f32(a as _)
+    _vld3_dup_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32"
+            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0"
         )]
-        fn _vld4q_f32(ptr: *const float32x4_t) -> float32x4x4_t;
+        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
     }
-    _vld4q_f32(a as _)
+    _vld3q_dup_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8"
+            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0"
         )]
-        fn _vld4_s8(ptr: *const int8x8_t) -> int8x8x4_t;
+        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
     }
-    _vld4_s8(a as _)
+    _vld3_dup_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8"
+            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0"
         )]
-        fn _vld4q_s8(ptr: *const int8x16_t) -> int8x16x4_t;
+        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
     }
-    _vld4q_s8(a as _)
+    _vld3q_dup_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16"
+            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0"
         )]
-        fn _vld4_s16(ptr: *const int16x4_t) -> int16x4x4_t;
+        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
     }
-    _vld4_s16(a as _)
+    _vld3_dup_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16"
+            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0"
         )]
-        fn _vld4q_s16(ptr: *const int16x8_t) -> int16x8x4_t;
+        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
     }
-    _vld4q_s16(a as _)
+    _vld3q_dup_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32"
+            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0"
         )]
-        fn _vld4_s32(ptr: *const int32x2_t) -> int32x2x4_t;
+        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
     }
-    _vld4_s32(a as _)
+    _vld3_dup_s32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32"
+            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0"
         )]
-        fn _vld4q_s32(ptr: *const int32x4_t) -> int32x4x4_t;
+        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
     }
-    _vld4q_s32(a as _)
+    _vld3q_dup_s32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0i8")]
-        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0"
+        )]
+        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
     }
-    _vld4_f32(a as *const i8, 4)
+    _vld3_dup_s64(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0i8")]
-        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")]
+        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
-    _vld4q_f32(a as *const i8, 4)
+    _vld3_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0i8")]
-        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")]
+        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
-    _vld4_s8(a as *const i8, 1)
+    _vld3q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0i8")]
-        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")]
+        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
-    _vld4q_s8(a as *const i8, 1)
+    _vld3_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0i8")]
-        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")]
+        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
-    _vld4_s16(a as *const i8, 2)
+    _vld3q_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0i8")]
-        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")]
+        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
-    _vld4q_s16(a as *const i8, 2)
+    _vld3_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0i8")]
-        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")]
+        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
-    _vld4_s32(a as *const i8, 4)
+    _vld3q_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0i8")]
-        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")]
+        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
-    _vld4q_s32(a as *const i8, 4)
+    _vld3_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8"
-        )]
-        fn _vld4_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")]
+        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
-    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3q_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8"
-        )]
-        fn _vld4q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x4_t;
-    }
-    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8"
-        )]
-        fn _vld4_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")]
+        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
-    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    _vld3_dup_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8"
-        )]
-        fn _vld4_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x4_t;
-    }
-    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8"
-        )]
-        fn _vld4q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x4_t;
-    }
-    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8"
-        )]
-        fn _vld4_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x4_t;
-    }
-    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
+    let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8"
-        )]
-        fn _vld4q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x4_t;
-    }
-    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0i8")]
-        fn _vld4_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x4_t;
-    }
-    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
+    let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0i8")]
-        fn _vld4q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x4_t;
-    }
-    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0i8")]
-        fn _vld4_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x4_t;
-    }
-    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0i8")]
-        fn _vld4_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x4_t;
-    }
-    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0i8")]
-        fn _vld4q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x4_t;
-    }
-    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0i8")]
-        fn _vld4_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x4_t;
-    }
-    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0i8")]
-        fn _vld4q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x4_t;
-    }
-    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14831,23 +16443,26 @@ pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
+    let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14856,23 +16471,22 @@ pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14881,23 +16495,26 @@ pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
+    let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14906,23 +16523,22 @@ pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14931,23 +16547,26 @@ pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
+    let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -14956,46 +16575,21 @@ pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15005,55 +16599,25 @@ pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64"
-        )]
-        fn _vld4_s64(ptr: *const int64x1_t) -> int64x1x4_t;
-    }
-    _vld4_s64(a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0i8")]
-        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
-    }
-    _vld4_s64(a as *const i8, 8)
+pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
+    let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15063,20 +16627,21 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
+pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15086,20 +16651,25 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
+    let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15109,20 +16679,21 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15132,20 +16703,37 @@ pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
+    let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15155,20 +16743,21 @@ pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15178,20 +16767,25 @@ pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_s32(transmute(a)))
+pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
+    let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15201,20 +16795,21 @@ pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_s32(transmute(a)))
+pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15224,741 +16819,645 @@ pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
+    let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v2f32.p0"
+        )]
+        fn _vld3_f32(ptr: *const float32x2_t) -> float32x2x3_t;
+    }
+    _vld3_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v4f32.p0"
+        )]
+        fn _vld3q_f32(ptr: *const float32x4_t) -> float32x4x3_t;
+    }
+    _vld3q_f32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3.v8i8.p0"
+        )]
+        fn _vld3_s8(ptr: *const int8x8_t) -> int8x8x3_t;
+    }
+    _vld3_s8(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v2f32"
+            link_name = "llvm.aarch64.neon.ld3.v16i8.p0"
         )]
-        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vld3q_s8(ptr: *const int8x16_t) -> int8x16x3_t;
     }
-    _vmax_f32(a, b)
+    _vld3q_s8(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v4f32"
+            link_name = "llvm.aarch64.neon.ld3.v4i16.p0"
         )]
-        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vld3_s16(ptr: *const int16x4_t) -> int16x4x3_t;
     }
-    _vmaxq_f32(a, b)
+    _vld3_s16(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v8i8"
+            link_name = "llvm.aarch64.neon.ld3.v8i16.p0"
         )]
-        fn _vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        fn _vld3q_s16(ptr: *const int16x8_t) -> int16x8x3_t;
     }
-    _vmax_s8(a, b)
+    _vld3q_s16(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v16i8"
+            link_name = "llvm.aarch64.neon.ld3.v2i32.p0"
         )]
-        fn _vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        fn _vld3_s32(ptr: *const int32x2_t) -> int32x2x3_t;
     }
-    _vmaxq_s8(a, b)
+    _vld3_s32(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v4i16"
+            link_name = "llvm.aarch64.neon.ld3.v4i32.p0"
         )]
-        fn _vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        fn _vld3q_s32(ptr: *const int32x4_t) -> int32x4x3_t;
     }
-    _vmax_s16(a, b)
+    _vld3q_s32(a as _)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v8i16"
-        )]
-        fn _vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")]
+        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
-    _vmaxq_s16(a, b)
+    _vld3_f32(a as *const i8, 4)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v2i32"
-        )]
-        fn _vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")]
+        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
-    _vmax_s32(a, b)
+    _vld3q_f32(a as *const i8, 4)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smax.v4i32"
-        )]
-        fn _vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")]
+        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
-    _vmaxq_s32(a, b)
+    _vld3_s8(a as *const i8, 1)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v8i8"
-        )]
-        fn _vmax_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")]
+        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
-    _vmax_u8(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3q_s8(a as *const i8, 1)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v16i8"
-        )]
-        fn _vmaxq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")]
+        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
-    _vmaxq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3_s16(a as *const i8, 2)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v4i16"
-        )]
-        fn _vmax_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")]
+        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
-    _vmax_u16(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3q_s16(a as *const i8, 2)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v8i16"
-        )]
-        fn _vmaxq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")]
+        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
-    _vmaxq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3_s32(a as *const i8, 4)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v2i32"
-        )]
-        fn _vmax_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")]
+        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
-    _vmax_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3q_s32(a as *const i8, 4)
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umax.v4i32"
+            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0"
         )]
-        fn _vmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vld3_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x3_t;
     }
-    _vmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
+            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0"
         )]
-        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vld3q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x3_t;
     }
-    _vmaxnm_f32(a, b)
+    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
-        )]
-        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")]
+        fn _vld3_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x3_t;
     }
-    _vmaxnmq_f32(a, b)
+    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v2f32"
+            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0"
         )]
-        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vld3_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x3_t;
     }
-    _vmin_f32(a, b)
+    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v4f32"
+            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0"
         )]
-        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+        fn _vld3_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x3_t;
     }
-    _vminq_f32(a, b)
+    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v8i8"
+            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0"
         )]
-        fn _vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        fn _vld3q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x3_t;
     }
-    _vmin_s8(a, b)
+    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v16i8"
+            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0"
         )]
-        fn _vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        fn _vld3_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x3_t;
     }
-    _vminq_s8(a, b)
+    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v4i16"
+            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0"
         )]
-        fn _vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        fn _vld3q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x3_t;
     }
-    _vmin_s16(a, b)
+    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")]
+        fn _vld3_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x3_t;
+    }
+    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")]
+        fn _vld3_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x3_t;
+    }
+    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")]
+        fn _vld3q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x3_t;
+    }
+    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")]
+        fn _vld3_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x3_t;
+    }
+    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")]
+        fn _vld3q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x3_t;
+    }
+    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15967,29 +17466,23 @@ pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v8i16"
-        )]
-        fn _vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vminq_s16(a, b)
+pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -15998,29 +17491,23 @@ pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v2i32"
-        )]
-        fn _vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vmin_s32(a, b)
+pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16029,29 +17516,23 @@ pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smin.v4i32"
-        )]
-        fn _vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vminq_s32(a, b)
+pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16060,29 +17541,23 @@ pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v8i8"
-        )]
-        fn _vmin_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vmin_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16091,29 +17566,23 @@ pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v16i8"
-        )]
-        fn _vminq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vminq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16122,29 +17591,23 @@ pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v4i16"
-        )]
-        fn _vmin_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vmin_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16153,29 +17616,23 @@ pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v8i16"
-        )]
-        fn _vminq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vminq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16184,28 +17641,21 @@ pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v2i32"
-        )]
-        fn _vmin_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vmin_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16215,59 +17665,55 @@ pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
+}
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umin.v4i32"
+            link_name = "llvm.aarch64.neon.ld3.v1i64.p0"
         )]
-        fn _vminq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vld3_s64(ptr: *const int64x1_t) -> int64x1x3_t;
     }
-    _vminq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+    _vld3_s64(a as _)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v2f32"
-        )]
-        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")]
+        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
-    _vminnm_f32(a, b)
+    _vld3_s64(a as *const i8, 8)
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16277,28 +17723,21 @@ pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminnm.v4f32"
-        )]
-        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    _vminnmq_f32(a, b)
+pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16308,20 +17747,21 @@ pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16331,22 +17771,26 @@ pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
+    let mut ret_val: uint8x8x3_t = transmute(vld3_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16355,27 +17799,22 @@ pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16384,27 +17823,38 @@ pub unsafe fn vmla_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
+    let mut ret_val: uint8x16x3_t = transmute(vld3q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16413,31 +17863,22 @@ pub unsafe fn vmla_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_f32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16446,31 +17887,26 @@ pub unsafe fn vmlaq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_f32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
+    let mut ret_val: uint16x4x3_t = transmute(vld3_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16479,31 +17915,22 @@ pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_lane_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16512,31 +17939,26 @@ pub unsafe fn vmla_lane_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_lane_u16<const LANE: i32>(
-    a: uint16x4_t,
-    b: uint16x4_t,
-    c: uint16x4_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
+    let mut ret_val: uint16x8x3_t = transmute(vld3q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16545,31 +17967,22 @@ pub unsafe fn vmla_lane_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_laneq_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_s32(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16578,31 +17991,26 @@ pub unsafe fn vmla_laneq_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_laneq_u16<const LANE: i32>(
-    a: uint16x4_t,
-    b: uint16x4_t,
-    c: uint16x8_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmla_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
+    let mut ret_val: uint32x2x3_t = transmute(vld3_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16611,44 +18019,22 @@ pub unsafe fn vmla_laneq_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_lane_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_s32(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16657,44 +18043,26 @@ pub unsafe fn vmlaq_lane_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_lane_u16<const LANE: i32>(
-    a: uint16x8_t,
-    b: uint16x8_t,
-    c: uint16x4_t,
-) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
+    let mut ret_val: uint32x4x3_t = transmute(vld3q_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16703,44 +18071,22 @@ pub unsafe fn vmlaq_lane_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16749,44 +18095,26 @@ pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(
-    a: uint16x8_t,
-    b: uint16x8_t,
-    c: uint16x8_t,
-) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlaq_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
+    let mut ret_val: poly8x8x3_t = transmute(vld3_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16795,27 +18123,22 @@ pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_lane_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16824,27 +18147,38 @@ pub unsafe fn vmla_lane_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_lane_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint32x2_t,
-    c: uint32x2_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
+    let mut ret_val: poly8x16x3_t = transmute(vld3q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16853,27 +18187,22 @@ pub unsafe fn vmla_lane_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16882,27 +18211,26 @@ pub unsafe fn vmla_laneq_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_laneq_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint32x2_t,
-    c: uint32x4_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
+    let mut ret_val: poly16x4x3_t = transmute(vld3_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16911,31 +18239,22 @@ pub unsafe fn vmla_laneq_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_lane_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld3)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -16944,64 +18263,349 @@ pub unsafe fn vmlaq_lane_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_lane_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x2_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlaq_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
+    let mut ret_val: poly16x8x3_t = transmute(vld3q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int32x4_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
     static_assert_uimm_bits!(LANE, 2);
-    vmlaq_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")]
+        fn _vld3q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x3_t;
+    }
+    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")]
+        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+    }
+    _vld4_dup_f32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")]
+        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+    }
+    _vld4q_dup_f32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")]
+        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+    }
+    _vld4_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")]
+        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+    }
+    _vld4q_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")]
+        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+    }
+    _vld4_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")]
+        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+    }
+    _vld4q_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")]
+        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+    }
+    _vld4_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")]
+        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32.p0"
+        )]
+        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
+    }
+    _vld4_dup_f32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32.p0"
+        )]
+        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
+    }
+    _vld4q_dup_f32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8.p0"
+        )]
+        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
+    }
+    _vld4_dup_s8(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8.p0"
+        )]
+        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
+    }
+    _vld4q_dup_s8(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16.p0"
+        )]
+        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
+    }
+    _vld4_dup_s16(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16.p0"
+        )]
+        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
+    }
+    _vld4q_dup_s16(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32.p0"
+        )]
+        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
+    }
+    _vld4_dup_s32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32.p0"
+        )]
+        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64.p0"
+        )]
+        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
+    }
+    _vld4_dup_s64(a as _)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17010,29 +18614,36 @@ pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlaq_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(nop))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")]
+        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+    }
+    _vld4_dup_s64(a as *const i8, 8)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17042,20 +18653,21 @@ pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmla_f32(a, b, vdup_n_f32(c))
+pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17065,20 +18677,21 @@ pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlaq_f32(a, b, vdupq_n_f32(c))
+pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17088,20 +18701,26 @@ pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmla_s16(a, b, vdup_n_s16(c))
+pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
+    let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17111,20 +18730,21 @@ pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlaq_s16(a, b, vdupq_n_s16(c))
+pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17134,20 +18754,42 @@ pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmla_u16(a, b, vdup_n_u16(c))
+pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
+    let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17157,20 +18799,21 @@ pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlaq_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17180,20 +18823,26 @@ pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmla_s32(a, b, vdup_n_s32(c))
+pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
+    let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17203,20 +18852,21 @@ pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlaq_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17226,20 +18876,26 @@ pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmla_u32(a, b, vdup_n_u32(c))
+pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
+    let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17249,20 +18905,21 @@ pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlaq_u32(a, b, vdupq_n_u32(c))
+pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_dup_s32(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17272,20 +18929,26 @@ pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
+    let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17295,20 +18958,21 @@ pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_dup_s32(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17318,20 +18982,26 @@ pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
+    let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17341,43 +19011,21 @@ pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    simd_add(a, simd_mul(b, c))
-}
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17387,20 +19035,26 @@ pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
+    let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17410,20 +19064,21 @@ pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17433,20 +19088,42 @@ pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
+    let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17456,20 +19133,21 @@ pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17479,20 +19157,26 @@ pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
+    let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17502,20 +19186,21 @@ pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17525,338 +19210,685 @@ pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_add(a, simd_mul(b, c))
+pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
+    let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_lane_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2f32.p0"
+        )]
+        fn _vld4_f32(ptr: *const float32x2_t) -> float32x2x4_t;
+    }
+    _vld4_f32(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_laneq_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4f32.p0"
+        )]
+        fn _vld4q_f32(ptr: *const float32x4_t) -> float32x4x4_t;
+    }
+    _vld4q_f32(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_lane_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v8i8.p0"
+        )]
+        fn _vld4_s8(ptr: *const int8x8_t) -> int8x8x4_t;
+    }
+    _vld4_s8(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_laneq_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v16i8.p0"
+        )]
+        fn _vld4q_s8(ptr: *const int8x16_t) -> int8x16x4_t;
+    }
+    _vld4q_s8(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_lane_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x4_t,
-    c: uint16x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4i16.p0"
+        )]
+        fn _vld4_s16(ptr: *const int16x4_t) -> int16x4x4_t;
+    }
+    _vld4_s16(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_laneq_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x4_t,
-    c: uint16x8_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlal_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v8i16.p0"
+        )]
+        fn _vld4q_s16(ptr: *const int16x8_t) -> int16x8x4_t;
+    }
+    _vld4q_s16(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_lane_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x2_t,
-    c: uint32x2_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v2i32.p0"
+        )]
+        fn _vld4_s32(ptr: *const int32x2_t) -> int32x2x4_t;
+    }
+    _vld4_s32(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_laneq_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x2_t,
-    c: uint32x4_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v4i32.p0"
+        )]
+        fn _vld4q_s32(ptr: *const int32x4_t) -> int32x4x4_t;
+    }
+    _vld4q_s32(a as _)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlal_s16(a, b, vdup_n_s16(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")]
+        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+    }
+    _vld4_f32(a as *const i8, 4)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlal_s32(a, b, vdup_n_s32(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")]
+        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+    }
+    _vld4q_f32(a as *const i8, 4)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlal_u16(a, b, vdup_n_u16(c))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")]
+        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+    }
+    _vld4_s8(a as *const i8, 1)
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")]
+        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+    }
+    _vld4q_s8(a as *const i8, 1)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")]
+        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+    }
+    _vld4_s16(a as *const i8, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")]
+        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+    }
+    _vld4q_s16(a as *const i8, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")]
+        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+    }
+    _vld4_s32(a as *const i8, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")]
+        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+    }
+    _vld4q_s32(a as *const i8, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0"
+        )]
+        fn _vld4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x4_t;
+    }
+    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0"
+        )]
+        fn _vld4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x4_t;
+    }
+    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0"
+        )]
+        fn _vld4_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x4_t;
+    }
+    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0"
+        )]
+        fn _vld4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x4_t;
+    }
+    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0"
+        )]
+        fn _vld4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x4_t;
+    }
+    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0"
+        )]
+        fn _vld4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x4_t;
+    }
+    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0"
+        )]
+        fn _vld4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x4_t;
+    }
+    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")]
+        fn _vld4_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x4_t;
+    }
+    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")]
+        fn _vld4q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x4_t;
+    }
+    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")]
+        fn _vld4_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x4_t;
+    }
+    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")]
+        fn _vld4_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x4_t;
+    }
+    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")]
+        fn _vld4q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x4_t;
+    }
+    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")]
+        fn _vld4_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x4_t;
+    }
+    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")]
+        fn _vld4q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x4_t;
+    }
+    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17865,21 +19897,23 @@ pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlal_u32(a, b, vdup_n_u32(c))
+pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17888,21 +19922,23 @@ pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    simd_add(a, vmull_s8(b, c))
+pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17911,21 +19947,23 @@ pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    simd_add(a, vmull_s16(b, c))
+pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17934,21 +19972,23 @@ pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    simd_add(a, vmull_s32(b, c))
+pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17957,21 +19997,23 @@ pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    simd_add(a, vmull_u8(b, c))
+pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -17980,21 +20022,23 @@ pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    simd_add(a, vmull_u16(b, c))
+pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18003,21 +20047,23 @@ pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    simd_add(a, vmull_u32(b, c))
+pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18026,20 +20072,21 @@ pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18049,22 +20096,56 @@ pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4.v1i64.p0"
+        )]
+        fn _vld4_s64(ptr: *const int64x1_t) -> int64x1x4_t;
+    }
+    _vld4_s64(a as _)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")]
+        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+    }
+    _vld4_s64(a as *const i8, 8)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(nop)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18073,27 +20154,22 @@ pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18102,27 +20178,22 @@ pub unsafe fn vmls_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18131,31 +20202,27 @@ pub unsafe fn vmls_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_f32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
+    let mut ret_val: uint8x8x4_t = transmute(vld4_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18164,31 +20231,22 @@ pub unsafe fn vmlsq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_f32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18197,31 +20255,43 @@ pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_lane_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
+    let mut ret_val: uint8x16x4_t = transmute(vld4q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18230,31 +20300,22 @@ pub unsafe fn vmls_lane_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_lane_u16<const LANE: i32>(
-    a: uint16x4_t,
-    b: uint16x4_t,
-    c: uint16x4_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18263,31 +20324,27 @@ pub unsafe fn vmls_lane_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_laneq_s16<const LANE: i32>(
-    a: int16x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
+    let mut ret_val: uint16x4x4_t = transmute(vld4_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18296,31 +20353,22 @@ pub unsafe fn vmls_laneq_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_laneq_u16<const LANE: i32>(
-    a: uint16x4_t,
-    b: uint16x4_t,
-    c: uint16x8_t,
-) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmls_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18329,44 +20377,27 @@ pub unsafe fn vmls_laneq_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_lane_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x4_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
+    let mut ret_val: uint16x8x4_t = transmute(vld4q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18375,44 +20406,22 @@ pub unsafe fn vmlsq_lane_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_lane_u16<const LANE: i32>(
-    a: uint16x8_t,
-    b: uint16x8_t,
-    c: uint16x4_t,
-) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_s32(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18421,44 +20430,27 @@ pub unsafe fn vmlsq_lane_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(
-    a: int16x8_t,
-    b: int16x8_t,
-    c: int16x8_t,
-) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_s16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
+    let mut ret_val: uint32x2x4_t = transmute(vld4_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18467,44 +20459,22 @@ pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(
-    a: uint16x8_t,
-    b: uint16x8_t,
-    c: uint16x8_t,
-) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsq_u16(
-        a,
-        b,
-        simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_s32(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18513,27 +20483,27 @@ pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_lane_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
+    let mut ret_val: uint32x4x4_t = transmute(vld4q_s32(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18542,27 +20512,22 @@ pub unsafe fn vmls_lane_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_lane_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint32x2_t,
-    c: uint32x2_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18571,27 +20536,27 @@ pub unsafe fn vmls_lane_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_laneq_s32<const LANE: i32>(
-    a: int32x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
+    let mut ret_val: poly8x8x4_t = transmute(vld4_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18600,27 +20565,22 @@ pub unsafe fn vmls_laneq_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_laneq_u32<const LANE: i32>(
-    a: uint32x2_t,
-    b: uint32x2_t,
-    c: uint32x4_t,
-) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18629,31 +20589,43 @@ pub unsafe fn vmls_laneq_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_lane_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x2_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
+    let mut ret_val: poly8x16x4_t = transmute(vld4q_s8(transmute(a)));
+    ret_val.0 = simd_shuffle!(
+        ret_val.0,
+        ret_val.0,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.1 = simd_shuffle!(
+        ret_val.1,
+        ret_val.1,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.2 = simd_shuffle!(
+        ret_val.2,
+        ret_val.2,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val.3 = simd_shuffle!(
+        ret_val.3,
+        ret_val.3,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    );
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18662,31 +20634,22 @@ pub unsafe fn vmlsq_lane_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_lane_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x2_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsq_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18695,31 +20658,27 @@ pub unsafe fn vmlsq_lane_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(
-    a: int32x4_t,
-    b: int32x4_t,
-    c: int32x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_s32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
+    let mut ret_val: poly16x4x4_t = transmute(vld4_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ld4)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -18728,29 +20687,21 @@ pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint32x4_t,
-    c: uint32x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsq_u32(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18760,20 +20711,25 @@ pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmls_f32(a, b, vdup_n_f32(c))
+pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
+    let mut ret_val: poly16x8x4_t = transmute(vld4q_s16(transmute(a)));
+    ret_val.0 = simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.1 = simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.2 = simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val.3 = simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    ret_val
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(fmax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18783,20 +20739,28 @@ pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlsq_f32(a, b, vdupq_n_f32(c))
+pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v2f32"
+        )]
+        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vmax_f32(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18806,20 +20770,28 @@ pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmls_s16(a, b, vdup_n_s16(c))
+pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmax.v4f32"
+        )]
+        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vmaxq_f32(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18829,20 +20801,28 @@ pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlsq_s16(a, b, vdupq_n_s16(c))
+pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v8i8"
+        )]
+        fn _vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vmax_s8(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18852,20 +20832,28 @@ pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmls_u16(a, b, vdup_n_u16(c))
+pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v16i8"
+        )]
+        fn _vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vmaxq_s8(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18875,20 +20863,28 @@ pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlsq_u16(a, b, vdupq_n_u16(c))
+pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v4i16"
+        )]
+        fn _vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmax_s16(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18898,20 +20894,28 @@ pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmls_s32(a, b, vdup_n_s32(c))
+pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v8i16"
+        )]
+        fn _vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vmaxq_s16(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18921,20 +20925,28 @@ pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlsq_s32(a, b, vdupq_n_s32(c))
+pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v2i32"
+        )]
+        fn _vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmax_s32(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18944,20 +20956,28 @@ pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmls_u32(a, b, vdup_n_u32(c))
+pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smax.v4i32"
+        )]
+        fn _vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vmaxq_s32(a, b)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18967,20 +20987,28 @@ pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlsq_u32(a, b, vdupq_n_u32(c))
+pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v8i8"
+        )]
+        fn _vmax_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vmax_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18990,20 +21018,28 @@ pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v16i8"
+        )]
+        fn _vmaxq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vmaxq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19013,20 +21049,28 @@ pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v4i16"
+        )]
+        fn _vmax_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmax_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19036,20 +21080,28 @@ pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v8i16"
+        )]
+        fn _vmaxq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vmaxq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19059,20 +21111,28 @@ pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v2i32"
+        )]
+        fn _vmax_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmax_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(umax)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19082,20 +21142,28 @@ pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umax.v4i32"
+        )]
+        fn _vmaxq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vmaxq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmaxnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19105,20 +21173,28 @@ pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
+        )]
+        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vmaxnm_f32(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmaxnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19128,20 +21204,28 @@ pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
+        )]
+        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vmaxnmq_f32(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19151,20 +21235,28 @@ pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v2f32"
+        )]
+        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vmin_f32(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19174,20 +21266,28 @@ pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v4f32"
+        )]
+        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vminq_f32(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19197,20 +21297,28 @@ pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v8i8"
+        )]
+        fn _vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vmin_s8(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19220,20 +21328,28 @@ pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v16i8"
+        )]
+        fn _vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vminq_s8(a, b)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(smin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19243,22 +21359,29 @@ pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    simd_sub(a, simd_mul(b, c))
+pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v4i16"
+        )]
+        fn _vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmin_s16(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19267,31 +21390,29 @@ pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_lane_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v8i16"
+        )]
+        fn _vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vminq_s16(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19300,31 +21421,29 @@ pub unsafe fn vmlsl_lane_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x8_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_s16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v2i32"
+        )]
+        fn _vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmin_s32(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19333,27 +21452,29 @@ pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_lane_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smin.v4i32"
+        )]
+        fn _vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vminq_s32(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19362,27 +21483,29 @@ pub unsafe fn vmlsl_lane_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x4_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v8i8"
+        )]
+        fn _vmin_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vmin_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19391,31 +21514,29 @@ pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_lane_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x4_t,
-    c: uint16x4_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v16i8"
+        )]
+        fn _vminq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vminq_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19424,31 +21545,29 @@ pub unsafe fn vmlsl_lane_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(
-    a: uint32x4_t,
-    b: uint16x4_t,
-    c: uint16x8_t,
-) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmlsl_u16(
-        a,
-        b,
-        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v4i16"
+        )]
+        fn _vmin_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vmin_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19457,27 +21576,29 @@ pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_lane_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x2_t,
-    c: uint32x2_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v8i16"
+        )]
+        fn _vminq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vminq_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19486,25 +21607,28 @@ pub unsafe fn vmlsl_lane_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(
-    a: uint64x2_t,
-    b: uint32x2_t,
-    c: uint32x4_t,
-) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
+pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v2i32"
+        )]
+        fn _vmin_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vmin_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19514,20 +21638,28 @@ pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlsl_s16(a, b, vdup_n_s16(c))
+pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umin.v4i32"
+        )]
+        fn _vminq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vminq_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19537,20 +21669,28 @@ pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlsl_s32(a, b, vdup_n_s32(c))
+pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+        )]
+        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vminnm_f32(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19560,20 +21700,28 @@ pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlsl_u16(a, b, vdup_n_u16(c))
+pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f32"
+        )]
+        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vminnmq_f32(a, b)
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19583,20 +21731,20 @@ pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlsl_u32(a, b, vdup_n_u32(c))
+pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19606,21 +21754,22 @@ pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    simd_sub(a, vmull_s8(b, c))
+pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19629,21 +21778,27 @@ pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    simd_sub(a, vmull_s16(b, c))
+pub unsafe fn vmla_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19652,21 +21807,27 @@ pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    simd_sub(a, vmull_s32(b, c))
+pub unsafe fn vmla_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19675,21 +21836,31 @@ pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    simd_sub(a, vmull_u8(b, c))
+pub unsafe fn vmlaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19698,21 +21869,31 @@ pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    simd_sub(a, vmull_u16(b, c))
+pub unsafe fn vmlaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19721,21 +21902,31 @@ pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    simd_sub(a, vmull_u32(b, c))
+pub unsafe fn vmla_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19744,21 +21935,31 @@ pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    simd_mul(a, b)
+pub unsafe fn vmla_lane_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19767,22 +21968,31 @@ pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    simd_mul(a, b)
+pub unsafe fn vmla_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19791,23 +22001,31 @@ pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmla_laneq_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19816,23 +22034,44 @@ pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+pub unsafe fn vmlaq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+    vmlaq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19841,54 +22080,44 @@ pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    simd_mul(
+pub unsafe fn vmlaq_lane_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u16(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
-}
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19897,26 +22126,44 @@ pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
+pub unsafe fn vmlaq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_s16(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19925,13 +22172,18 @@ pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int1
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
+pub unsafe fn vmlaq_laneq_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_u16(
         a,
+        b,
         simd_shuffle!(
-            b,
-            b,
+            c,
+            c,
             [
                 LANE as u32,
                 LANE as u32,
@@ -19945,19 +22197,19 @@ pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int
         ),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19966,23 +22218,27 @@ pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub unsafe fn vmla_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -19991,26 +22247,27 @@ pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+pub unsafe fn vmla_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20019,26 +22276,27 @@ pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+pub unsafe fn vmla_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+    vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20047,39 +22305,27 @@ pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> ui
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+pub unsafe fn vmla_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+    vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20088,23 +22334,31 @@ pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub unsafe fn vmlaq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+    vmlaq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20113,26 +22367,31 @@ pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> ui
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+pub unsafe fn vmlaq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    simd_mul(
+    vmlaq_u32(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20141,26 +22400,31 @@ pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(
+pub unsafe fn vmlaq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_s32(
         a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla, LANE = 1)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20169,39 +22433,30 @@ pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(
+pub unsafe fn vmlaq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u32(
         a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
     )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20210,23 +22465,21 @@ pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> in
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmla_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20235,26 +22488,21 @@ pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlaq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20263,26 +22511,21 @@ pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> in
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmla_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20291,39 +22534,21 @@ pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    simd_mul(
-        a,
-        simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        ),
-    )
+pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlaq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20332,23 +22557,21 @@ pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmla_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mla)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20357,24 +22580,20 @@ pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    simd_mul(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlaq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20384,20 +22603,20 @@ pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
-    simd_mul(a, vdup_n_f32(b))
+pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmla_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20407,20 +22626,20 @@ pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
-    simd_mul(a, vdupq_n_f32(b))
+pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlaq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20430,20 +22649,20 @@ pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    simd_mul(a, vdup_n_s16(b))
+pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmla_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20453,20 +22672,20 @@ pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    simd_mul(a, vdupq_n_s16(b))
+pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlaq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20476,20 +22695,20 @@ pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    simd_mul(a, vdup_n_s32(b))
+pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20499,20 +22718,20 @@ pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    simd_mul(a, vdupq_n_s32(b))
+pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20522,20 +22741,20 @@ pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
-    simd_mul(a, vdup_n_u16(b))
+pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20545,20 +22764,20 @@ pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
-    simd_mul(a, vdupq_n_u16(b))
+pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20568,20 +22787,20 @@ pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
-    simd_mul(a, vdup_n_u32(b))
+pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20591,20 +22810,20 @@ pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
-    simd_mul(a, vdupq_n_u32(b))
+pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20614,28 +22833,20 @@ pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v8i8"
-        )]
-        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
-    }
-    _vmul_p8(a, b)
+pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20645,28 +22856,20 @@ pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v16i8"
-        )]
-        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
-    }
-    _vmulq_p8(a, b)
+pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20676,20 +22879,20 @@ pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_mul(a, b)
+pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20699,20 +22902,20 @@ pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_mul(a, b)
+pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20722,20 +22925,20 @@ pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_mul(a, b)
+pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20745,21 +22948,22 @@ pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_mul(a, b)
+pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_add(a, simd_mul(b, c))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20768,21 +22972,31 @@ pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20791,21 +23005,31 @@ pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20814,21 +23038,27 @@ pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20837,21 +23067,27 @@ pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20860,21 +23096,31 @@ pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20883,21 +23129,31 @@ pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20906,21 +23162,27 @@ pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(umlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20929,22 +23191,26 @@ pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_mul(a, b)
+pub unsafe fn vmlal_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20953,26 +23219,21 @@ pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s16(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlal_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20981,26 +23242,21 @@ pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_s16(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlal_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21009,23 +23265,21 @@ pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> in
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlal_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21034,23 +23288,21 @@ pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlal_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21059,26 +23311,21 @@ pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> in
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u16(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    simd_add(a, vmull_s8(b, c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21087,26 +23334,21 @@ pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vmull_u16(
-        a,
-        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-    )
+pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    simd_add(a, vmull_s16(b, c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21115,23 +23357,21 @@ pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    simd_add(a, vmull_s32(b, c))
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21140,21 +23380,20 @@ pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
+pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    simd_add(a, vmull_u8(b, c))
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21164,20 +23403,20 @@ pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vmull_s16(a, vdup_n_s16(b))
+pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    simd_add(a, vmull_u16(b, c))
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(umlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21187,20 +23426,20 @@ pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vmull_s32(a, vdup_n_s32(b))
+pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    simd_add(a, vmull_u32(b, c))
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21210,20 +23449,20 @@ pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
-    vmull_u16(a, vdup_n_u16(b))
+pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21233,21 +23472,22 @@ pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
-    vmull_u32(a, vdup_n_u32(b))
+pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmull)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21256,29 +23496,27 @@ pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmull.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
-        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
-    }
-    _vmull_p8(a, b)
+pub unsafe fn vmls_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21287,29 +23525,27 @@ pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
-        fn _vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-    _vmull_s16(a, b)
+pub unsafe fn vmls_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21318,29 +23554,31 @@ pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
-        fn _vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-    _vmull_s32(a, b)
+pub unsafe fn vmlsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21349,29 +23587,31 @@ pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
-        fn _vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
-    }
-    _vmull_s8(a, b)
+pub unsafe fn vmlsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_f32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21380,29 +23620,31 @@ pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
-        fn _vmull_u8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
-    }
-    _vmull_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmls_lane_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21411,29 +23653,31 @@ pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
-        fn _vmull_u16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-    _vmull_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmls_lane_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21442,29 +23686,31 @@ pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
-        fn _vmull_u32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-    _vmull_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmls_laneq_s16<const LANE: i32>(
+    a: int16x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21473,21 +23719,31 @@ pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
-    simd_neg(a)
+pub unsafe fn vmls_laneq_u16<const LANE: i32>(
+    a: uint16x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21496,21 +23752,44 @@ pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
-    simd_neg(a)
+pub unsafe fn vmlsq_lane_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x4_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21519,21 +23798,44 @@ pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
-    simd_neg(a)
+pub unsafe fn vmlsq_lane_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x4_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21542,21 +23844,44 @@ pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
-    simd_neg(a)
+pub unsafe fn vmlsq_laneq_s16<const LANE: i32>(
+    a: int16x8_t,
+    b: int16x8_t,
+    c: int16x8_t,
+) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_s16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21565,21 +23890,44 @@ pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
-    simd_neg(a)
+pub unsafe fn vmlsq_laneq_u16<const LANE: i32>(
+    a: uint16x8_t,
+    b: uint16x8_t,
+    c: uint16x8_t,
+) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_u16(
+        a,
+        b,
+        simd_shuffle!(
+            c,
+            c,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21588,21 +23936,27 @@ pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
-    simd_neg(a)
+pub unsafe fn vmls_lane_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21611,21 +23965,27 @@ pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
-    simd_neg(a)
+pub unsafe fn vmls_lane_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21634,21 +23994,27 @@ pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
-    simd_neg(a)
+pub unsafe fn vmls_laneq_s32<const LANE: i32>(
+    a: int32x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21657,21 +24023,27 @@ pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    simd_or(a, b)
+pub unsafe fn vmls_laneq_u32<const LANE: i32>(
+    a: uint32x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21680,21 +24052,31 @@ pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_lane_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x2_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21703,21 +24085,31 @@ pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_lane_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x2_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21726,21 +24118,31 @@ pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_laneq_s32<const LANE: i32>(
+    a: int32x4_t,
+    b: int32x4_t,
+    c: int32x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21749,20 +24151,29 @@ pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_laneq_u32<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint32x4_t,
+    c: uint32x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u32(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21772,20 +24183,20 @@ pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    simd_or(a, b)
+pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmls_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21795,20 +24206,20 @@ pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlsq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21818,20 +24229,20 @@ pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    simd_or(a, b)
+pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmls_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21841,20 +24252,20 @@ pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlsq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21864,20 +24275,20 @@ pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    simd_or(a, b)
+pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmls_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21887,20 +24298,20 @@ pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlsq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21910,20 +24321,20 @@ pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    simd_or(a, b)
+pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmls_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21933,20 +24344,20 @@ pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlsq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21956,20 +24367,20 @@ pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    simd_or(a, b)
+pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmls_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21979,20 +24390,20 @@ pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    simd_or(a, b)
+pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlsq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22002,20 +24413,20 @@ pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    simd_or(a, b)
+pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(faddp)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22025,28 +24436,20 @@ pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v2f32"
-        )]
-        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    _vpadd_f32(a, b)
+pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22056,28 +24459,20 @@ pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
-    }
-    _vqabs_s8(a)
+pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22087,28 +24482,20 @@ pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
-    }
-    _vqabsq_s8(a)
+pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22118,28 +24505,20 @@ pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
-    }
-    _vqabs_s16(a)
+pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22149,28 +24528,20 @@ pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
-    }
-    _vqabsq_s16(a)
+pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22180,28 +24551,20 @@ pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
-    }
-    _vqabs_s32(a)
+pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22211,28 +24574,20 @@ pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
-    }
-    _vqabsq_s32(a)
+pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22242,28 +24597,20 @@ pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
-        fn _vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqadd_s8(a, b)
+pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22273,28 +24620,20 @@ pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
-        fn _vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vqaddq_s8(a, b)
+pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22304,28 +24643,20 @@ pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
-        fn _vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqadd_s16(a, b)
+pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22335,29 +24666,22 @@ pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
-        fn _vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqaddq_s16(a, b)
+pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    simd_sub(a, simd_mul(b, c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22366,29 +24690,31 @@ pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
-        fn _vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqadd_s32(a, b)
+pub unsafe fn vmlsl_lane_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22397,29 +24723,31 @@ pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
-        fn _vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqaddq_s32(a, b)
+pub unsafe fn vmlsl_laneq_s16<const LANE: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x8_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_s16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22428,29 +24756,27 @@ pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
-        fn _vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    _vqadd_s64(a, b)
+pub unsafe fn vmlsl_lane_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22459,29 +24785,27 @@ pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqadd.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
-        fn _vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vqaddq_s64(a, b)
+pub unsafe fn vmlsl_laneq_s32<const LANE: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x4_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22490,29 +24814,31 @@ pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
-        fn _vqadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_lane_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x4_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22521,29 +24847,31 @@ pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
-        fn _vqaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vqaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_laneq_u16<const LANE: i32>(
+    a: uint32x4_t,
+    b: uint16x4_t,
+    c: uint16x8_t,
+) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_u16(
+        a,
+        b,
+        simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22552,29 +24880,27 @@ pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
-        fn _vqadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_lane_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x2_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22583,28 +24909,25 @@ pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
-        fn _vqaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_laneq_u32<const LANE: i32>(
+    a: uint64x2_t,
+    b: uint32x2_t,
+    c: uint32x4_t,
+) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32]))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(smlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22614,28 +24937,20 @@ pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
-        fn _vqadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlsl_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(smlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22645,28 +24960,20 @@ pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
-        fn _vqaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlsl_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22676,28 +24983,20 @@ pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
-        fn _vqadd_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    _vqadd_u64(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlsl_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22707,30 +25006,21 @@ pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqadd.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
-        fn _vqaddq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vqaddq_u64(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlsl_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal, N = 2)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22739,27 +25029,21 @@ pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_lane_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    simd_sub(a, vmull_s8(b, c))
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal, N = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22768,25 +25052,20 @@ pub unsafe fn vqdmlal_lane_s16<const N: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_lane_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    simd_sub(a, vmull_s16(b, c))
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(smlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22796,20 +25075,20 @@ pub unsafe fn vqdmlal_lane_s32<const N: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_n_s16(b, c))
+pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    simd_sub(a, vmull_s32(b, c))
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22819,20 +25098,20 @@ pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_n_s32(b, c))
+pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    simd_sub(a, vmull_u8(b, c))
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22842,20 +25121,20 @@ pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_s16(b, c))
+pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    simd_sub(a, vmull_u16(b, c))
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(umlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22865,78 +25144,82 @@ pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_s32(b, c))
+pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    simd_sub(a, vmull_u32(b, c))
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl, N = 2)
+    assert_instr(smmla)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_lane_s16<const N: i32>(
-    a: int32x4_t,
-    b: int16x4_t,
-    c: int16x4_t,
-) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
+        fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vmmlaq_s32(a, b, c)
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl, N = 1)
+    assert_instr(ummla)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_lane_s32<const N: i32>(
-    a: int64x2_t,
-    b: int32x2_t,
-    c: int32x2_t,
-) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
+        fn _vmmlaq_u32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vmmlaq_u32(a.as_signed(), b.as_signed(), c.as_signed()).as_unsigned()
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22946,20 +25229,20 @@ pub unsafe fn vqdmlsl_lane_s32<const N: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_n_s16(b, c))
+pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22969,21 +25252,22 @@ pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_n_s32(b, c))
+pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_mul(a, b)
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22992,21 +25276,23 @@ pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_s16(b, c))
+pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23015,20 +25301,21 @@ pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_s32(b, c))
+pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(fmul, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23039,21 +25326,24 @@ pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
+pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(fmul, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23064,21 +25354,24 @@ pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
+pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(mul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23089,21 +25382,24 @@ pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(mul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23114,22 +25410,39 @@ pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23138,22 +25451,23 @@ pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmulh_s16(a, b)
+pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23162,22 +25476,26 @@ pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    let b: int16x8_t = vdupq_n_s16(b);
-    vqdmulhq_s16(a, b)
+pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23186,22 +25504,26 @@ pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmulh_s32(a, b)
+pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23210,22 +25532,39 @@ pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    let b: int32x4_t = vdupq_n_s32(b);
-    vqdmulhq_s32(a, b)
+pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23234,29 +25573,23 @@ pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
-        )]
-        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqdmulh_s16(a, b)
+pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23265,29 +25598,26 @@ pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
-        )]
-        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqdmulhq_s16(a, b)
+pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23296,29 +25626,26 @@ pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
-        )]
-        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqdmulh_s32(a, b)
+pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23327,28 +25654,37 @@ pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
-        )]
-        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqdmulhq_s32(a, b)
-}
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 2)
+pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(mul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23359,22 +25695,21 @@ pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-    vqdmull_s16(a, b)
+pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 1)
+    assert_instr(mul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -23385,23 +25720,26 @@ pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-    vqdmull_s32(a, b)
+pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23410,21 +25748,26 @@ pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int6
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vqdmull_s16(a, vdup_n_s16(b))
+pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23433,21 +25776,39 @@ pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vqdmull_s32(a, vdup_n_s32(b))
+pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    simd_mul(
+        a,
+        simd_shuffle!(
+            b,
+            b,
+            [
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32,
+                LANE as u32
+            ]
+        ),
+    )
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23456,29 +25817,23 @@ pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
-        )]
-        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-    _vqdmull_s16(a, b)
+pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23487,28 +25842,24 @@ pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
-        )]
-        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-    _vqdmull_s32(a, b)
+pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    simd_mul(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23518,28 +25869,20 @@ pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
-        )]
-        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
-    }
-    _vqmovn_s16(a)
+pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
+    simd_mul(a, vdup_n_f32(b))
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23549,28 +25892,20 @@ pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
-        )]
-        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
-    }
-    _vqmovn_s32(a)
+pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
+    simd_mul(a, vdupq_n_f32(b))
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23580,28 +25915,20 @@ pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
-        )]
-        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
-    }
-    _vqmovn_s64(a)
+pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    simd_mul(a, vdup_n_s16(b))
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23611,28 +25938,20 @@ pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
-        )]
-        fn _vqmovn_u16(a: int16x8_t) -> int8x8_t;
-    }
-    _vqmovn_u16(a.as_signed()).as_unsigned()
+pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    simd_mul(a, vdupq_n_s16(b))
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23642,28 +25961,20 @@ pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
-        )]
-        fn _vqmovn_u32(a: int32x4_t) -> int16x4_t;
-    }
-    _vqmovn_u32(a.as_signed()).as_unsigned()
+pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    simd_mul(a, vdup_n_s32(b))
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23673,28 +25984,20 @@ pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
-        )]
-        fn _vqmovn_u64(a: int64x2_t) -> int32x2_t;
-    }
-    _vqmovn_u64(a.as_signed()).as_unsigned()
+pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    simd_mul(a, vdupq_n_s32(b))
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23704,28 +26007,20 @@ pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
-        )]
-        fn _vqmovun_s16(a: int16x8_t) -> int8x8_t;
-    }
-    _vqmovun_s16(a).as_unsigned()
+pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
+    simd_mul(a, vdup_n_u16(b))
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23735,28 +26030,20 @@ pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
-        )]
-        fn _vqmovun_s32(a: int32x4_t) -> int16x4_t;
-    }
-    _vqmovun_s32(a).as_unsigned()
+pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
+    simd_mul(a, vdupq_n_u16(b))
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23766,28 +26053,20 @@ pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
-        )]
-        fn _vqmovun_s64(a: int64x2_t) -> int32x2_t;
-    }
-    _vqmovun_s64(a).as_unsigned()
+pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
+    simd_mul(a, vdup_n_u32(b))
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23797,28 +26076,20 @@ pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
-        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
-    }
-    _vqneg_s8(a)
+pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
+    simd_mul(a, vdupq_n_u32(b))
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(pmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23828,28 +26099,28 @@ pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
+pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v16i8"
+            link_name = "llvm.aarch64.neon.pmul.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
-        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
     }
-    _vqnegq_s8(a)
+    _vmul_p8(a, b)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(pmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23859,28 +26130,28 @@ pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
+pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i16"
+            link_name = "llvm.aarch64.neon.pmul.v16i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
-        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
     }
-    _vqneg_s16(a)
+    _vmulq_p8(a, b)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23890,28 +26161,20 @@ pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
-        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
-    }
-    _vqnegq_s16(a)
+pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_mul(a, b)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23921,28 +26184,20 @@ pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
-        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
-    }
-    _vqneg_s32(a)
+pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_mul(a, b)
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23952,30 +26207,21 @@ pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
-        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
-    }
-    _vqnegq_s32(a)
+pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -23984,24 +26230,21 @@ pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24010,24 +26253,21 @@ pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24036,24 +26276,21 @@ pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulh_s16(a, b)
+pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24062,24 +26299,21 @@ pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-    vqrdmulh_s32(a, b)
+pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24088,37 +26322,21 @@ pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int16x8_t = simd_shuffle!(
-        b,
-        b,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmulhq_s16(a, b)
+pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24127,24 +26345,21 @@ pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24153,37 +26368,21 @@ pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    let b: int16x8_t = simd_shuffle!(
-        b,
-        b,
-        [
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32,
-            LANE as u32
-        ]
-    );
-    vqrdmulhq_s16(a, b)
+pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24192,22 +26391,20 @@ pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-    vqrdmulhq_s32(a, b)
+pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24217,21 +26414,22 @@ pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    vqrdmulh_s16(a, vdup_n_s16(b))
+pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_mul(a, b)
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24240,21 +26438,26 @@ pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    vqrdmulhq_s16(a, vdupq_n_s16(b))
+pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24263,21 +26466,26 @@ pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    vqrdmulh_s32(a, vdup_n_s32(b))
+pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_s16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24286,21 +26494,23 @@ pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    vqrdmulhq_s32(a, vdupq_n_s32(b))
+pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(smull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24309,29 +26519,23 @@ pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
-        )]
-        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqrdmulh_s16(a, b)
+pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24340,29 +26544,26 @@ pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
-        )]
-        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqrdmulhq_s16(a, b)
+pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24371,29 +26572,26 @@ pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
-        )]
-        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqrdmulh_s32(a, b)
+pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_u16(
+        a,
+        simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
+    )
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24402,29 +26600,23 @@ pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
-        )]
-        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqrdmulhq_s32(a, b)
+pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(umull, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24433,28 +26625,21 @@ pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
-        )]
-        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqrshl_s8(a, b)
+pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32]))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24464,28 +26649,20 @@ pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
-        )]
-        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vqrshlq_s8(a, b)
+pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vmull_s16(a, vdup_n_s16(b))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24495,28 +26672,20 @@ pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
-        )]
-        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqrshl_s16(a, b)
+pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vmull_s32(a, vdup_n_s32(b))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24526,28 +26695,20 @@ pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
-        )]
-        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqrshlq_s16(a, b)
+pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
+    vmull_u16(a, vdup_n_u16(b))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24557,28 +26718,20 @@ pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
-        )]
-        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqrshl_s32(a, b)
+pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
+    vmull_u32(a, vdup_n_u32(b))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(pmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24588,28 +26741,28 @@ pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+            link_name = "llvm.aarch64.neon.pmull.v8i16"
         )]
-        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")]
+        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
     }
-    _vqrshlq_s32(a, b)
+    _vmull_p8(a, b)
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24619,28 +26772,28 @@ pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+            link_name = "llvm.aarch64.neon.smull.v4i16"
         )]
-        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
+        fn _vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-    _vqrshl_s64(a, b)
+    _vmull_s16(a, b)
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24650,28 +26803,28 @@ pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+            link_name = "llvm.aarch64.neon.smull.v2i32"
         )]
-        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
+        fn _vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-    _vqrshlq_s64(a, b)
+    _vmull_s32(a, b)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(smull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24681,28 +26834,28 @@ pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+            link_name = "llvm.aarch64.neon.smull.v8i8"
         )]
-        fn _vqrshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
+        fn _vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
-    _vqrshl_u8(a.as_signed(), b).as_unsigned()
+    _vmull_s8(a, b)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24712,28 +26865,28 @@ pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+            link_name = "llvm.aarch64.neon.umull.v8i8"
         )]
-        fn _vqrshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
+        fn _vmull_u8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
     }
-    _vqrshlq_u8(a.as_signed(), b).as_unsigned()
+    _vmull_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24743,28 +26896,28 @@ pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+            link_name = "llvm.aarch64.neon.umull.v4i16"
         )]
-        fn _vqrshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
+        fn _vmull_u16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-    _vqrshl_u16(a.as_signed(), b).as_unsigned()
+    _vmull_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(umull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24774,28 +26927,28 @@ pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+            link_name = "llvm.aarch64.neon.umull.v2i32"
         )]
-        fn _vqrshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
+        fn _vmull_u32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-    _vqrshlq_u16(a.as_signed(), b).as_unsigned()
+    _vmull_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24805,28 +26958,20 @@ pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
-        )]
-        fn _vqrshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqrshl_u32(a.as_signed(), b).as_unsigned()
+pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
+    simd_neg(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24836,28 +26981,20 @@ pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
-        )]
-        fn _vqrshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqrshlq_u32(a.as_signed(), b).as_unsigned()
+pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
+    simd_neg(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24867,28 +27004,20 @@ pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
-        )]
-        fn _vqrshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    _vqrshl_u64(a.as_signed(), b).as_unsigned()
+pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
+    simd_neg(a)
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24898,423 +27027,21 @@ pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
-        )]
-        fn _vqrshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vqrshlq_u64(a.as_signed(), b).as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    _vqrshrn_n_s16(
-        a,
-        const {
-            int16x8_t([
-                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
-                -N as i16,
-            ])
-        },
-    )
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    _vqrshrn_n_s32(
-        a,
-        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
-    )
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    _vqrshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
-        )]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    _vqrshrn_n_s16(a, N)
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
-        )]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    _vqrshrn_n_s32(a, N)
-}
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
-        )]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    _vqrshrn_n_s64(a, N)
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
-        fn _vqrshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    _vqrshrn_n_u16(
-        a.as_signed(),
-        const {
-            uint16x8_t([
-                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                -N as u16,
-            ])
-        }
-        .as_signed(),
-    )
-    .as_unsigned()
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
-        fn _vqrshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    _vqrshrn_n_u32(
-        a.as_signed(),
-        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed(),
-    )
-    .as_unsigned()
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
-        fn _vqrshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    _vqrshrn_n_u64(
-        a.as_signed(),
-        const { uint64x2_t([-N as u64, -N as u64]) }.as_signed(),
-    )
-    .as_unsigned()
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
-        )]
-        fn _vqrshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    _vqrshrn_n_u16(a.as_signed(), N).as_unsigned()
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
-        )]
-        fn _vqrshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    _vqrshrn_n_u32(a.as_signed(), N).as_unsigned()
-}
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
-        )]
-        fn _vqrshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    _vqrshrn_n_u64(a.as_signed(), N).as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    _vqrshrun_n_s16(
-        a,
-        const {
-            int16x8_t([
-                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
-                -N as i16,
-            ])
-        },
-    )
-    .as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    _vqrshrun_n_s32(
-        a,
-        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
-    )
-    .as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    _vqrshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }).as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
-        )]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    _vqrshrun_n_s16(a, N).as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
-        )]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    _vqrshrun_n_s32(a, N).as_unsigned()
-}
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
-        )]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    _vqrshrun_n_s64(a, N).as_unsigned()
+pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
+    simd_neg(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(neg)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25323,23 +27050,21 @@ pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_s8(a, vdup_n_s8(N as _))
+pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
+    simd_neg(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(neg)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25348,23 +27073,21 @@ pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_s8(a, vdupq_n_s8(N as _))
+pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
+    simd_neg(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(neg)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25373,23 +27096,21 @@ pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_s16(a, vdup_n_s16(N as _))
+pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
+    simd_neg(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(neg)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25398,23 +27119,21 @@ pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_s16(a, vdupq_n_s16(N as _))
+pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
+    simd_neg(a)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25423,23 +27142,21 @@ pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_s32(a, vdup_n_s32(N as _))
+pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    simd_or(a, b)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25448,23 +27165,21 @@ pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_s32(a, vdupq_n_s32(N as _))
+pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    simd_or(a, b)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25473,23 +27188,21 @@ pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_s64(a, vdup_n_s64(N as _))
+pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    simd_or(a, b)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25498,23 +27211,21 @@ pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_s64(a, vdupq_n_s64(N as _))
+pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25523,23 +27234,21 @@ pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_u8(a, vdup_n_s8(N as _))
+pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25548,23 +27257,21 @@ pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_u8(a, vdupq_n_s8(N as _))
+pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25573,23 +27280,21 @@ pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_u16(a, vdup_n_s16(N as _))
+pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25598,23 +27303,21 @@ pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_u16(a, vdupq_n_s16(N as _))
+pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25623,23 +27326,21 @@ pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_u32(a, vdup_n_s32(N as _))
+pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25648,23 +27349,21 @@ pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_u32(a, vdupq_n_s32(N as _))
+pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25673,23 +27372,21 @@ pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_u64(a, vdup_n_s64(N as _))
+pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    simd_or(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
+    assert_instr(orr)
 )]
-#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25698,21 +27395,20 @@ pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_u64(a, vdupq_n_s64(N as _))
+pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    simd_or(a, b)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25722,28 +27418,20 @@ pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i8"
-        )]
-        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqshl_s8(a, b)
+pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    simd_or(a, b)
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25753,28 +27441,98 @@ pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v16i8"
-        )]
-        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    simd_or(a, b)
+}
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    simd_or(a, b)
+}
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(orr)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    simd_or(a, b)
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sadalp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
+    let x: int16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s8(a, b);
     }
-    _vqshlq_s8(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_s8(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25784,28 +27542,29 @@ pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i16"
-        )]
-        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let x: int16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s8(a, b);
     }
-    _vqshl_s16(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_s8(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25815,28 +27574,29 @@ pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i16"
-        )]
-        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
+    let x: int32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s16(a, b);
     }
-    _vqshlq_s16(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_s16(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25846,28 +27606,29 @@ pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i32"
-        )]
-        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let x: int32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s16(a, b);
     }
-    _vqshl_s32(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_s16(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25877,28 +27638,29 @@ pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i32"
-        )]
-        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
+    let x: int64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s32(a, b);
     }
-    _vqshlq_s32(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_s32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25908,28 +27670,29 @@ pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v1i64"
-        )]
-        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let x: int64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s32(a, b);
     }
-    _vqshl_s64(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_s32(b), a);
+    };
+    x
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25939,28 +27702,29 @@ pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i64"
-        )]
-        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
+    let x: uint16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u8(a, b);
     }
-    _vqshlq_s64(a, b)
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_u8(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25970,28 +27734,29 @@ pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i8"
-        )]
-        fn _vqshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let x: uint16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u8(a, b);
     }
-    _vqshl_u8(a.as_signed(), b).as_unsigned()
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_u8(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26001,28 +27766,29 @@ pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v16i8"
-        )]
-        fn _vqshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
+    let x: uint32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u16(a, b);
     }
-    _vqshlq_u8(a.as_signed(), b).as_unsigned()
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_u16(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26032,28 +27798,29 @@ pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i16"
-        )]
-        fn _vqshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let x: uint32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u16(a, b);
     }
-    _vqshl_u16(a.as_signed(), b).as_unsigned()
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_u16(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26063,28 +27830,29 @@ pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i16"
-        )]
-        fn _vqshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
+    let x: uint64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u32(a, b);
     }
-    _vqshlq_u16(a.as_signed(), b).as_unsigned()
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddl_u32(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26094,28 +27862,29 @@ pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i32"
-        )]
-        fn _vqshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let x: uint64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u32(a, b);
     }
-    _vqshl_u32(a.as_signed(), b).as_unsigned()
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    {
+        x = simd_add(vpaddlq_u32(b), a);
+    };
+    x
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(faddp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26125,28 +27894,28 @@ pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i32"
+            link_name = "llvm.aarch64.neon.faddp.v2f32"
         )]
-        fn _vqshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vqshlq_u32(a.as_signed(), b).as_unsigned()
+    _vpadd_f32(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26156,28 +27925,28 @@ pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v1i64"
+            link_name = "llvm.aarch64.neon.addp.v8i8"
         )]
-        fn _vqshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
+        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vqshl_u64(a.as_signed(), b).as_unsigned()
+    _vpadd_s8(a, b)
 }
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26187,799 +27956,13140 @@ pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i64"
+            link_name = "llvm.aarch64.neon.addp.v4i16"
         )]
-        fn _vqshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
+        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vqshlq_u64(a.as_signed(), b).as_unsigned()
+    _vpadd_s16(a, b)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
+        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vqshlu_n_s8(
-        a,
-        const {
-            int8x8_t([
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-            ])
-        },
-    )
-    .as_unsigned()
+    _vpadd_s32(a, b)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
-    }
-    _vqshluq_n_s8(
-        a,
-        const {
-            int8x16_t([
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-            ])
-        },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vpadd_s8(transmute(a), transmute(b)))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
-    }
-    _vqshlu_n_s16(
-        a,
-        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vpadd_s8(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
-    }
-    _vqshluq_n_s16(
-        a,
-        const {
-            int16x8_t([
-                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-            ])
-        },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    transmute(vpadd_s16(transmute(a), transmute(b)))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
-    }
-    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let b: uint16x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(vpadd_s16(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
-    }
-    _vqshluq_n_s32(
-        a,
-        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    transmute(vpadd_s32(transmute(a), transmute(b)))
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
-    }
-    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(addp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let b: uint32x2_t = simd_shuffle!(b, b, [1, 0]);
+    let ret_val: uint32x2_t = transmute(vpadd_s32(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
+        fn _vpaddl_s8(a: int8x8_t) -> int16x4_t;
     }
-    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
+    _vpaddl_s8(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
+            link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
         )]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
+        fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t;
     }
-    _vqshlu_n_s8(
-        a,
-        const {
-            int8x8_t([
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-            ])
-        },
-    )
-    .as_unsigned()
+    _vpaddlq_s8(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
+            link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
         )]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
+        fn _vpaddl_s16(a: int16x4_t) -> int32x2_t;
     }
-    _vqshluq_n_s8(
-        a,
-        const {
-            int8x16_t([
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
-            ])
-        },
-    )
-    .as_unsigned()
+    _vpaddl_s16(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
+            link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
         )]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
+        fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t;
     }
-    _vqshlu_n_s16(
-        a,
-        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
-    )
-    .as_unsigned()
+    _vpaddlq_s16(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
+            link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
         )]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
+        fn _vpaddl_s32(a: int32x2_t) -> int64x1_t;
     }
-    _vqshluq_n_s16(
-        a,
-        const {
-            int16x8_t([
-                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
-            ])
-        },
-    )
-    .as_unsigned()
+    _vpaddl_s32(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(saddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
+            link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
         )]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
+        fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t;
     }
-    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
+    _vpaddlq_s32(a)
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
+            link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
         )]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
+        fn _vpaddl_u8(a: int8x8_t) -> int16x4_t;
     }
-    _vqshluq_n_s32(
-        a,
-        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
-    )
-    .as_unsigned()
+    _vpaddl_u8(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
+            link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
         )]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
+        fn _vpaddlq_u8(a: int8x16_t) -> int16x8_t;
     }
-    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
+    _vpaddlq_u8(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
+            link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
         )]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
+        fn _vpaddl_u16(a: int16x4_t) -> int32x2_t;
     }
-    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
+    _vpaddl_u16(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
-        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
+        fn _vpaddlq_u16(a: int16x8_t) -> int32x4_t;
     }
-    _vqshrn_n_s16(
-        a,
-        const {
-            int16x8_t([
-                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
-                -N as i16,
-            ])
-        },
-    )
+    _vpaddlq_u16(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
-        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
+        fn _vpaddl_u32(a: int32x2_t) -> int64x1_t;
     }
-    _vqshrn_n_s32(
-        a,
-        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
-    )
+    _vpaddl_u32(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uaddlp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
-        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
+        fn _vpaddlq_u32(a: int32x4_t) -> int64x2_t;
     }
-    _vqshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
+    _vpaddlq_u32(a.as_signed()).as_unsigned()
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
+            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
         )]
-        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
+        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vqshrn_n_s16(a, N)
+    _vpmax_f32(a, b)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
+            link_name = "llvm.aarch64.neon.smaxp.v8i8"
         )]
-        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
+        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vqshrn_n_s32(a, N)
+    _vpmax_s8(a, b)
 }
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
+            link_name = "llvm.aarch64.neon.smaxp.v4i16"
         )]
-        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
+        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vqshrn_n_s64(a, N)
+    _vpmax_s16(a, b)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
-        fn _vqshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
+        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vqshrn_n_u16(
-        a.as_signed(),
-        const {
-            uint16x8_t([
-                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                -N as u16,
-            ])
-        }
-        .as_signed(),
-    )
-    .as_unsigned()
+    _vpmax_s32(a, b)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
-        fn _vqshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
+        fn _vpmax_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    _vqshrn_n_u32(
-        a.as_signed(),
-        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed(),
-    )
-    .as_unsigned()
+    _vpmax_u8(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
-        fn _vqshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
+        fn _vpmax_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    _vqshrn_n_u64(
-        a.as_signed(),
-        const { uint64x2_t([-N as u64, -N as u64]) }.as_signed(),
-    )
-    .as_unsigned()
+    _vpmax_u16(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umaxp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
+            link_name = "llvm.aarch64.neon.umaxp.v2i32"
         )]
-        fn _vqshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
+        fn _vpmax_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    _vqshrn_n_u16(a.as_signed(), N).as_unsigned()
+    _vpmax_u32(a.as_signed(), b.as_signed()).as_unsigned()
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
+            link_name = "llvm.aarch64.neon.fminp.v2f32"
         )]
-        fn _vqshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
+        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vqshrn_n_u32(a.as_signed(), N).as_unsigned()
+    _vpmin_f32(a, b)
 }
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
+        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vpmin_s8(a, b)
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
+        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vpmin_s16(a, b)
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
+        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vpmin_s32(a, b)
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
+        fn _vpmin_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vpmin_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
+        fn _vpmin_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vpmin_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uminp)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
+        fn _vpmin_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vpmin_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
+        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vqabs_s8(a)
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
+        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vqabsq_s8(a)
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
+        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vqabs_s16(a)
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
+        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vqabsq_s16(a)
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
+        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vqabs_s32(a)
+}
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqabs)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
+        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vqabsq_s32(a)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
+        fn _vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqadd_s8(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
+        fn _vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqaddq_s8(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
+        fn _vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqadd_s16(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
+        fn _vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqaddq_s16(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
+        fn _vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqadd_s32(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
+        fn _vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqaddq_s32(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
+        fn _vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqadd_s64(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqadd.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
+        fn _vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqaddq_s64(a, b)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
+        fn _vqadd_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqadd_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
+        fn _vqaddq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqaddq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
+        fn _vqadd_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqadd_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
+        fn _vqaddq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqaddq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
+        fn _vqadd_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqadd_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
+        fn _vqaddq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqaddq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
+        fn _vqadd_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqadd_u64(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqadd)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqadd.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
+        fn _vqaddq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqaddq_u64(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_n_s16(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_n_s32(b, c))
+}
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_s16(b, c))
+}
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlal)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_s32(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl, N = 2)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_lane_s16<const N: i32>(
+    a: int32x4_t,
+    b: int16x4_t,
+    c: int16x4_t,
+) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl, N = 1)
+)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_lane_s32<const N: i32>(
+    a: int64x2_t,
+    b: int32x2_t,
+    c: int32x2_t,
+) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_n_s16(b, c))
+}
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_n_s32(b, c))
+}
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_s16(b, c))
+}
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmlsl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_s32(b, c))
+}
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32)))
+}
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32)))
+}
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32)))
+}
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32)))
+}
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmulh_s16(a, b)
+}
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    let b: int16x8_t = vdupq_n_s16(b);
+    vqdmulhq_s16(a, b)
+}
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmulh_s32(a, b)
+}
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    let b: int32x4_t = vdupq_n_s32(b);
+    vqdmulhq_s32(a, b)
+}
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
+        )]
+        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqdmulh_s16(a, b)
+}
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
+        )]
+        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqdmulhq_s16(a, b)
+}
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
+        )]
+        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqdmulh_s32(a, b)
+}
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
+        )]
+        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqdmulhq_s32(a, b)
+}
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull, N = 2)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
+    vqdmull_s16(a, b)
+}
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull, N = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
+    vqdmull_s32(a, b)
+}
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vqdmull_s16(a, vdup_n_s16(b))
+}
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vqdmull_s32(a, vdup_n_s32(b))
+}
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
+        )]
+        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
+    }
+    _vqdmull_s16(a, b)
+}
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqdmull)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
+        )]
+        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
+    }
+    _vqdmull_s32(a, b)
+}
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
+        )]
+        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
+    }
+    _vqmovn_s16(a)
+}
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
+        )]
+        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovn_s32(a)
+}
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
+        )]
+        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovn_s64(a)
+}
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
+        )]
+        fn _vqmovn_u16(a: int16x8_t) -> int8x8_t;
+    }
+    _vqmovn_u16(a.as_signed()).as_unsigned()
+}
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
+        )]
+        fn _vqmovn_u32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovn_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqxtn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
+        )]
+        fn _vqmovn_u64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovn_u64(a.as_signed()).as_unsigned()
+}
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
+        )]
+        fn _vqmovun_s16(a: int16x8_t) -> int8x8_t;
+    }
+    _vqmovun_s16(a).as_unsigned()
+}
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
+        )]
+        fn _vqmovun_s32(a: int32x4_t) -> int16x4_t;
+    }
+    _vqmovun_s32(a).as_unsigned()
+}
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqxtun)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
+        )]
+        fn _vqmovun_s64(a: int64x2_t) -> int32x2_t;
+    }
+    _vqmovun_s64(a).as_unsigned()
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
+        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+    }
+    _vqneg_s8(a)
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
+        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    _vqnegq_s8(a)
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
+        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+    }
+    _vqneg_s16(a)
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
+        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    _vqnegq_s16(a)
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
+        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
+    }
+    _vqneg_s32(a)
+}
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqneg)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
+        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    _vqnegq_s32(a)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b: int16x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulh_s16(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
+    vqrdmulh_s32(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: int16x8_t = simd_shuffle!(
+        b,
+        b,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmulhq_s16(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b: int16x8_t = simd_shuffle!(
+        b,
+        b,
+        [
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32,
+            LANE as u32
+        ]
+    );
+    vqrdmulhq_s16(a, b)
+}
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh, LANE = 1)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b: int32x4_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
+    vqrdmulhq_s32(a, b)
+}
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    vqrdmulh_s16(a, vdup_n_s16(b))
+}
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    vqrdmulhq_s16(a, vdupq_n_s16(b))
+}
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    vqrdmulh_s32(a, vdup_n_s32(b))
+}
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    vqrdmulhq_s32(a, vdupq_n_s32(b))
+}
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
+        )]
+        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqrdmulh_s16(a, b)
+}
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
+        )]
+        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqrdmulhq_s16(a, b)
+}
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
+        )]
+        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqrdmulh_s32(a, b)
+}
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrdmulh)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
+        )]
+        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqrdmulhq_s32(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
+        )]
+        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqrshl_s8(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+        )]
+        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqrshlq_s8(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+        )]
+        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqrshl_s16(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+        )]
+        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqrshlq_s16(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+        )]
+        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqrshl_s32(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+        )]
+        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqrshlq_s32(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+        )]
+        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqrshl_s64(a, b)
+}
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+        )]
+        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqrshlq_s64(a, b)
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+        )]
+        fn _vqrshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqrshl_u8(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+        )]
+        fn _vqrshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqrshlq_u8(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+        )]
+        fn _vqrshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqrshl_u16(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+        )]
+        fn _vqrshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqrshlq_u16(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
+        )]
+        fn _vqrshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqrshl_u32(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
+        )]
+        fn _vqrshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqrshlq_u32(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
+        )]
+        fn _vqrshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqrshl_u64(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqrshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
+        )]
+        fn _vqrshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqrshlq_u64(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqrshrn_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqrshrn_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqrshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqrshrn_n_s16(a, N)
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqrshrn_n_s32(a, N)
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqrshrn_n_s64(a, N)
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
+        fn _vqrshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqrshrn_n_u16(
+        a.as_signed(),
+        const {
+            uint16x8_t([
+                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
+                -N as u16,
+            ])
+        }
+        .as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
+        fn _vqrshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqrshrn_n_u32(
+        a.as_signed(),
+        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
+        fn _vqrshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqrshrn_n_u64(
+        a.as_signed(),
+        const { uint64x2_t([-N as u64, -N as u64]) }.as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
+        )]
+        fn _vqrshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqrshrn_n_u16(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
+        )]
+        fn _vqrshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqrshrn_n_u32(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
+        )]
+        fn _vqrshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqrshrn_n_u64(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqrshrun_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqrshrun_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqrshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
+        )]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqrshrun_n_s16(a, N).as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
+        )]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqrshrun_n_s32(a, N).as_unsigned()
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
+        )]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqrshrun_n_s64(a, N).as_unsigned()
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_s8(a, vdup_n_s8(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_s8(a, vdupq_n_s8(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_s16(a, vdup_n_s16(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_s16(a, vdupq_n_s16(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_s32(a, vdup_n_s32(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_s32(a, vdupq_n_s32(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_s64(a, vdup_n_s64(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_s64(a, vdupq_n_s64(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_u8(a, vdup_n_s8(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_u8(a, vdupq_n_s8(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_u16(a, vdup_n_s16(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_u16(a, vdupq_n_s16(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_u32(a, vdup_n_s32(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_u32(a, vdupq_n_s32(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_u64(a, vdup_n_s64(N as _))
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl, N = 2)
+)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_u64(a, vdupq_n_s64(N as _))
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i8"
+        )]
+        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqshl_s8(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v16i8"
+        )]
+        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqshlq_s8(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i16"
+        )]
+        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqshl_s16(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v8i16"
+        )]
+        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqshlq_s16(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i32"
+        )]
+        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqshl_s32(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v4i32"
+        )]
+        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqshlq_s32(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v1i64"
+        )]
+        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqshl_s64(a, b)
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshl.v2i64"
+        )]
+        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqshlq_s64(a, b)
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i8"
+        )]
+        fn _vqshl_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqshl_u8(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v16i8"
+        )]
+        fn _vqshlq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqshlq_u8(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i16"
+        )]
+        fn _vqshl_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqshl_u16(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v8i16"
+        )]
+        fn _vqshlq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqshlq_u16(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i32"
+        )]
+        fn _vqshl_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqshl_u32(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v4i32"
+        )]
+        fn _vqshlq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqshlq_u32(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v1i64"
+        )]
+        fn _vqshl_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqshl_u64(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqshl)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshl.v2i64"
+        )]
+        fn _vqshlq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqshlq_u64(a.as_signed(), b).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
+    }
+    _vqshlu_n_s8(
+        a,
+        const {
+            int8x8_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
+    }
+    _vqshluq_n_s8(
+        a,
+        const {
+            int8x16_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
+    }
+    _vqshlu_n_s16(
+        a,
+        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
+    }
+    _vqshluq_n_s16(
+        a,
+        const {
+            int16x8_t([
+                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
+    }
+    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
+    }
+    _vqshluq_n_s32(
+        a,
+        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
+    }
+    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
+    }
+    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
+        )]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> int8x8_t;
+    }
+    _vqshlu_n_s8(
+        a,
+        const {
+            int8x8_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
+        )]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> int8x16_t;
+    }
+    _vqshluq_n_s8(
+        a,
+        const {
+            int8x16_t([
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+                N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
+        )]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> int16x4_t;
+    }
+    _vqshlu_n_s16(
+        a,
+        const { int16x4_t([N as i16, N as i16, N as i16, N as i16]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
+        )]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> int16x8_t;
+    }
+    _vqshluq_n_s16(
+        a,
+        const {
+            int16x8_t([
+                N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
+        )]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> int32x2_t;
+    }
+    _vqshlu_n_s32(a, const { int32x2_t([N as i32, N as i32]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
+        )]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> int32x4_t;
+    }
+    _vqshluq_n_s32(
+        a,
+        const { int32x4_t([N as i32, N as i32, N as i32, N as i32]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
+        )]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> int64x1_t;
+    }
+    _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
+        )]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> int64x2_t;
+    }
+    _vqshluq_n_s64(a, const { int64x2_t([N as i64, N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
+        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqshrn_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
+        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqshrn_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
+        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqshrn_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) })
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
+        )]
+        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqshrn_n_s16(a, N)
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
+        )]
+        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqshrn_n_s32(a, N)
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
+        )]
+        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqshrn_n_s64(a, N)
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
+        fn _vqshrn_n_u16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqshrn_n_u16(
+        a.as_signed(),
+        const {
+            uint16x8_t([
+                -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
+                -N as u16,
+            ])
+        }
+        .as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
+        fn _vqshrn_n_u32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqshrn_n_u32(
+        a.as_signed(),
+        const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
+        fn _vqshrn_n_u64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqshrn_n_u64(
+        a.as_signed(),
+        const { uint64x2_t([-N as u64, -N as u64]) }.as_signed(),
+    )
+    .as_unsigned()
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
+        )]
+        fn _vqshrn_n_u16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqshrn_n_u16(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
+        )]
+        fn _vqshrn_n_u32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqshrn_n_u32(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
+        )]
+        fn _vqshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqshrn_n_u64(a.as_signed(), N).as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
+        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    _vqshrun_n_s16(
+        a,
+        const {
+            int16x8_t([
+                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
+                -N as i16,
+            ])
+        },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
+        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    _vqshrun_n_s32(
+        a,
+        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
+    )
+    .as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
+        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    _vqshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }).as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
+        )]
+        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+    }
+    _vqshrun_n_s16(a, N).as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
+        )]
+        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+    }
+    _vqshrun_n_s32(a, N).as_unsigned()
+}
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
+        )]
+        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+    }
+    _vqshrun_n_s64(a, N).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
+        fn _vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqsub_s8(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
+        fn _vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqsubq_s8(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
+        fn _vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqsub_s16(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
+        fn _vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqsubq_s16(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
+        fn _vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqsub_s32(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
+        fn _vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqsubq_s32(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
+        fn _vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqsub_s64(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqsub.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
+        fn _vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqsubq_s64(a, b)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
+        fn _vqsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vqsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
+        fn _vqsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    }
+    _vqsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
+        fn _vqsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    _vqsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
+        fn _vqsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    _vqsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
+        fn _vqsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    _vqsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
+        fn _vqsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    }
+    _vqsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v1i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
+        fn _vqsub_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+    }
+    _vqsub_u64(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqsub.v2i64"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
+        fn _vqsubq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    }
+    _vqsubq_u64(a.as_signed(), b.as_signed()).as_unsigned()
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x = vraddhn_s16(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x = vraddhn_s32(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x = vraddhn_s64(b, c);
+    simd_shuffle!(a, x, [0, 1, 2, 3])
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
+    let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
+    let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
+    simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
+}
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn2)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
+    let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
+    simd_shuffle!(a, x, [0, 1, 2, 3])
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
+        fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+    _vraddhn_s16(a, b)
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
+        fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
+    }
+    _vraddhn_s32(a, b)
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
+        fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+    _vraddhn_s64(a, b)
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    transmute(vraddhn_s16(transmute(a), transmute(b)))
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    transmute(vraddhn_s32(transmute(a), transmute(b)))
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    transmute(vraddhn_s64(transmute(a), transmute(b)))
+}
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(raddhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+    let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f32"
+        )]
+        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
+    }
+    _vrecpe_f32(a)
+}
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f32"
+        )]
+        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
+    }
+    _vrecpeq_f32(a)
+}
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v2i32"
+        )]
+        fn _vrecpe_u32(a: int32x2_t) -> int32x2_t;
+    }
+    _vrecpe_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(urecpe)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v4i32"
+        )]
+        fn _vrecpeq_u32(a: int32x4_t) -> int32x4_t;
+    }
+    _vrecpeq_u32(a.as_signed()).as_unsigned()
+}
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecps)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f32"
+        )]
+        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    _vrecps_f32(a, b)
+}
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(frecps)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f32"
+        )]
+        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    _vrecpsq_f32(a, b)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+    let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+    let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+    transmute(a)
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
-        )]
-        fn _vqshrn_n_u64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    _vqshrn_n_u64(a.as_signed(), N).as_unsigned()
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
-        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    _vqshrun_n_s16(
-        a,
-        const {
-            int16x8_t([
-                -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16,
-                -N as i16,
-            ])
-        },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
-        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    _vqshrun_n_s32(
-        a,
-        const { int32x4_t([-N as i32, -N as i32, -N as i32, -N as i32]) },
-    )
-    .as_unsigned()
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
-        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    _vqshrun_n_s64(a, const { int64x2_t([-N as i64, -N as i64]) }).as_unsigned()
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
-        )]
-        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    _vqshrun_n_s16(a, N).as_unsigned()
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
-        )]
-        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    _vqshrun_n_s32(a, N).as_unsigned()
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+    transmute(a)
 }
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
-        )]
-        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    _vqshrun_n_s64(a, N).as_unsigned()
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26989,28 +41099,21 @@ pub unsafe fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.8i8")]
-        fn _vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqsub_s8(a, b)
+pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27020,28 +41123,23 @@ pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.16i8")]
-        fn _vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vqsubq_s8(a, b)
+pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27051,28 +41149,21 @@ pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.4i16")]
-        fn _vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqsub_s16(a, b)
+pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27082,28 +41173,23 @@ pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.8i16")]
-        fn _vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqsubq_s16(a, b)
+pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27113,28 +41199,21 @@ pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.2i32")]
-        fn _vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqsub_s32(a, b)
+pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27144,28 +41223,23 @@ pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.4i32")]
-        fn _vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqsubq_s32(a, b)
+pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27175,28 +41249,21 @@ pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.1i64")]
-        fn _vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    _vqsub_s64(a, b)
+pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27206,28 +41273,23 @@ pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqsub.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.2i64")]
-        fn _vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vqsubq_s64(a, b)
+pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27237,28 +41299,21 @@ pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.8i8")]
-        fn _vqsub_u8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    _vqsub_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27268,28 +41323,22 @@ pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.16i8")]
-        fn _vqsubq_u8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    _vqsubq_u8(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27299,28 +41348,21 @@ pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.4i16")]
-        fn _vqsub_u16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    _vqsub_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27330,28 +41372,23 @@ pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.8i16")]
-        fn _vqsubq_u16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    _vqsubq_u16(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27361,28 +41398,21 @@ pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.2i32")]
-        fn _vqsub_u32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    _vqsub_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27392,28 +41422,23 @@ pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.4i32")]
-        fn _vqsubq_u32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    _vqsubq_u32(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27423,28 +41448,21 @@ pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v1i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.1i64")]
-        fn _vqsub_u64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    _vqsub_u64(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+    transmute(a)
 }
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27454,28 +41472,22 @@ pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqsub.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.2i64")]
-        fn _vqsubq_u64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    _vqsubq_u64(a.as_signed(), b.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    transmute(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27485,28 +41497,21 @@ pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v2f32"
-        )]
-        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
-    }
-    _vrecpe_f32(a)
+pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+    transmute(a)
 }
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27516,28 +41521,23 @@ pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v4f32"
-        )]
-        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    _vrecpeq_f32(a)
+pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27547,28 +41547,21 @@ pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v2i32"
-        )]
-        fn _vrecpe_u32(a: int32x2_t) -> int32x2_t;
-    }
-    _vrecpe_u32(a.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    transmute(a)
 }
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27578,28 +41571,23 @@ pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v4i32"
-        )]
-        fn _vrecpeq_u32(a: int32x4_t) -> int32x4_t;
-    }
-    _vrecpeq_u32(a.as_signed()).as_unsigned()
+pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27609,28 +41597,21 @@ pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v2f32"
-        )]
-        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    _vrecps_f32(a, b)
+pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+    transmute(a)
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27640,22 +41621,17 @@ pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v4f32"
-        )]
-        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    _vrecpsq_f32(a, b)
+pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27671,14 +41647,15 @@ pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27694,14 +41671,21 @@ pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27717,14 +41701,15 @@ pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27740,14 +41725,17 @@ pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27763,14 +41751,15 @@ pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27786,14 +41775,17 @@ pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27809,14 +41801,15 @@ pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27832,14 +41825,17 @@ pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27855,14 +41851,15 @@ pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27878,14 +41875,21 @@ pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27901,14 +41905,15 @@ pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27924,14 +41929,17 @@ pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    transmute(a)
+pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27947,14 +41955,15 @@ pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27970,14 +41979,17 @@ pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -27993,14 +42005,15 @@ pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28016,14 +42029,21 @@ pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28039,14 +42059,15 @@ pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28062,14 +42083,17 @@ pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28085,14 +42109,15 @@ pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28108,14 +42133,17 @@ pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28131,14 +42159,15 @@ pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28154,14 +42183,17 @@ pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28177,14 +42209,15 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28200,14 +42233,17 @@ pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28223,14 +42259,15 @@ pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28246,14 +42283,17 @@ pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28269,14 +42309,15 @@ pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28292,14 +42333,16 @@ pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28315,14 +42358,15 @@ pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28338,14 +42382,17 @@ pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28361,14 +42408,15 @@ pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28384,14 +42432,17 @@ pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28407,14 +42458,15 @@ pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28430,14 +42482,16 @@ pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28453,14 +42507,15 @@ pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28476,14 +42531,17 @@ pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28499,14 +42557,15 @@ pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28522,14 +42581,17 @@ pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28545,14 +42607,15 @@ pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28568,14 +42631,17 @@ pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28591,14 +42657,15 @@ pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28614,14 +42681,21 @@ pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28637,14 +42711,15 @@ pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28660,14 +42735,17 @@ pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28683,14 +42761,15 @@ pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28706,14 +42785,17 @@ pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28729,14 +42811,15 @@ pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28752,14 +42835,17 @@ pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28775,14 +42861,15 @@ pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28798,14 +42885,21 @@ pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28821,14 +42915,15 @@ pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28844,14 +42939,17 @@ pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28867,14 +42965,15 @@ pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28890,14 +42989,17 @@ pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28913,14 +43015,15 @@ pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28936,14 +43039,21 @@ pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28959,14 +43069,15 @@ pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -28982,14 +43093,17 @@ pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29005,14 +43119,15 @@ pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29028,14 +43143,16 @@ pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29051,14 +43168,15 @@ pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29074,14 +43192,16 @@ pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29097,14 +43217,15 @@ pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29120,14 +43241,16 @@ pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29143,14 +43266,15 @@ pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29166,11 +43290,12 @@ pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
@@ -29189,14 +43314,15 @@ pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29212,14 +43338,15 @@ pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29235,14 +43362,16 @@ pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29258,14 +43387,15 @@ pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29281,14 +43411,16 @@ pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29304,14 +43436,15 @@ pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29327,14 +43460,16 @@ pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29350,14 +43485,15 @@ pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29373,14 +43509,16 @@ pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29396,14 +43534,15 @@ pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29419,14 +43558,16 @@ pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29442,14 +43583,15 @@ pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29465,14 +43607,17 @@ pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29488,14 +43633,15 @@ pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29511,14 +43657,21 @@ pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29534,14 +43687,15 @@ pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29557,14 +43711,17 @@ pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29580,14 +43737,15 @@ pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29603,14 +43761,17 @@ pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29626,14 +43787,15 @@ pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29649,14 +43811,17 @@ pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29672,14 +43837,15 @@ pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29695,14 +43861,21 @@ pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29718,14 +43891,15 @@ pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29741,14 +43915,17 @@ pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29764,14 +43941,15 @@ pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29787,14 +43965,17 @@ pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29810,14 +43991,15 @@ pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29833,14 +44015,21 @@ pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29856,14 +44045,15 @@ pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29879,14 +44069,17 @@ pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29902,14 +44095,15 @@ pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29925,14 +44119,17 @@ pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29948,14 +44145,15 @@ pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29971,14 +44169,17 @@ pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -29994,14 +44195,15 @@ pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30017,14 +44219,17 @@ pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30040,14 +44245,15 @@ pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30063,14 +44269,17 @@ pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30086,14 +44295,15 @@ pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30109,14 +44319,16 @@ pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30132,14 +44344,15 @@ pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30155,14 +44368,17 @@ pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30178,14 +44394,15 @@ pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30201,14 +44418,17 @@ pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30224,14 +44444,15 @@ pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30247,14 +44468,17 @@ pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30270,14 +44494,15 @@ pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30293,14 +44518,16 @@ pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30316,14 +44543,15 @@ pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30339,14 +44567,17 @@ pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30362,14 +44593,15 @@ pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30385,14 +44617,17 @@ pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30408,14 +44643,15 @@ pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30431,14 +44667,21 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30454,14 +44697,15 @@ pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30477,14 +44721,17 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30500,14 +44747,15 @@ pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30523,14 +44771,17 @@ pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30546,14 +44797,15 @@ pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30569,14 +44821,17 @@ pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30592,14 +44847,15 @@ pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30615,14 +44871,21 @@ pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30638,14 +44901,15 @@ pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30661,14 +44925,17 @@ pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30684,14 +44951,15 @@ pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30707,14 +44975,17 @@ pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30730,14 +45001,15 @@ pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30753,14 +45025,17 @@ pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30776,14 +45051,15 @@ pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30799,14 +45075,17 @@ pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30822,14 +45101,15 @@ pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30845,14 +45125,17 @@ pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: float32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30868,14 +45151,15 @@ pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30891,14 +45175,17 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30914,14 +45201,15 @@ pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30937,14 +45225,17 @@ pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30960,14 +45251,15 @@ pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -30983,14 +45275,17 @@ pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31006,14 +45301,15 @@ pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31029,14 +45325,16 @@ pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31052,14 +45350,15 @@ pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31075,14 +45374,17 @@ pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31098,14 +45400,15 @@ pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31121,14 +45424,17 @@ pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31144,14 +45450,15 @@ pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31167,14 +45474,17 @@ pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31190,14 +45500,15 @@ pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31213,14 +45524,16 @@ pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31236,14 +45549,15 @@ pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31259,14 +45573,17 @@ pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31282,14 +45599,15 @@ pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31305,14 +45623,17 @@ pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: float32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31328,14 +45649,15 @@ pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31351,14 +45673,21 @@ pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31374,14 +45703,15 @@ pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31397,14 +45727,17 @@ pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31420,14 +45753,15 @@ pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31443,14 +45777,17 @@ pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31466,14 +45803,15 @@ pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31489,14 +45827,17 @@ pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31512,14 +45853,15 @@ pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31535,14 +45877,21 @@ pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31558,14 +45907,15 @@ pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31581,14 +45931,17 @@ pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31604,14 +45957,15 @@ pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31627,14 +45981,17 @@ pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31650,14 +46007,15 @@ pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31673,14 +46031,17 @@ pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31696,14 +46057,15 @@ pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -31719,16 +46081,23 @@ pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31742,16 +46111,17 @@ pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31765,16 +46135,22 @@ pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31788,16 +46164,17 @@ pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31811,16 +46188,18 @@ pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31834,16 +46213,17 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31857,16 +46237,18 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31880,16 +46262,17 @@ pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31903,16 +46286,18 @@ pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    let ret_val: int64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31926,16 +46311,17 @@ pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31949,16 +46335,22 @@ pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31972,16 +46364,17 @@ pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -31995,16 +46388,18 @@ pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32018,16 +46413,17 @@ pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32041,16 +46437,18 @@ pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32064,16 +46462,17 @@ pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32087,16 +46486,18 @@ pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    let ret_val: uint64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32110,16 +46511,17 @@ pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32133,16 +46535,22 @@ pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32156,16 +46564,17 @@ pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32179,16 +46588,18 @@ pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32202,16 +46613,17 @@ pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32225,16 +46637,18 @@ pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32248,16 +46662,17 @@ pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32271,16 +46686,18 @@ pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32294,16 +46711,17 @@ pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32317,16 +46735,18 @@ pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32340,16 +46760,17 @@ pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32363,16 +46784,19 @@ pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+    let a: int8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32386,16 +46810,17 @@ pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32409,16 +46834,18 @@ pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+    let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32432,16 +46859,17 @@ pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32455,16 +46883,18 @@ pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32478,16 +46908,17 @@ pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32501,16 +46932,19 @@ pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32524,16 +46958,17 @@ pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32547,16 +46982,18 @@ pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+    let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32570,16 +47007,17 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32593,16 +47031,18 @@ pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32616,16 +47056,17 @@ pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32639,16 +47080,19 @@ pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+    let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32662,16 +47106,17 @@ pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32685,16 +47130,18 @@ pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+    let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32708,16 +47155,17 @@ pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32731,16 +47179,18 @@ pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -32754,14 +47204,15 @@ pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32777,14 +47228,16 @@ pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32800,14 +47253,15 @@ pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32823,14 +47277,17 @@ pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+    let a: uint8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32846,14 +47303,15 @@ pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32869,14 +47327,16 @@ pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+    let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32892,14 +47352,15 @@ pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32915,14 +47376,16 @@ pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32938,14 +47401,15 @@ pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32961,14 +47425,17 @@ pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -32984,14 +47451,15 @@ pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33007,14 +47475,16 @@ pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+    let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33030,14 +47500,15 @@ pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33053,14 +47524,16 @@ pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33076,14 +47549,15 @@ pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33099,14 +47573,17 @@ pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33122,14 +47599,15 @@ pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33145,14 +47623,16 @@ pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33168,14 +47648,15 @@ pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33191,14 +47672,16 @@ pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33214,14 +47697,15 @@ pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33237,14 +47721,16 @@ pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33260,14 +47746,15 @@ pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33283,14 +47770,17 @@ pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    let a: poly8x16_t = simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33306,14 +47796,15 @@ pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33329,14 +47820,16 @@ pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+    let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33352,14 +47845,15 @@ pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33375,14 +47869,16 @@ pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33398,14 +47894,15 @@ pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33421,14 +47918,17 @@ pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
-    transmute(a)
+pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
+    let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly64x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33444,14 +47944,15 @@ pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33467,14 +47968,16 @@ pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
-    transmute(a)
+pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+    let ret_val: int8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33490,14 +47993,15 @@ pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33513,14 +48017,16 @@ pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
-    transmute(a)
+pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
+    let ret_val: int16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33536,14 +48042,15 @@ pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33559,14 +48066,16 @@ pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    transmute(a)
+pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+    let ret_val: int32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33582,14 +48091,15 @@ pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33605,14 +48115,16 @@ pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
+    let ret_val: uint8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33628,14 +48140,15 @@ pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
+pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33651,14 +48164,16 @@ pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+    let ret_val: uint16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33674,14 +48189,15 @@ pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
+pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33697,14 +48213,16 @@ pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
-    transmute(a)
+pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
+    let ret_val: uint32x2_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33720,14 +48238,15 @@ pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
+pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33743,14 +48262,16 @@ pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
-    transmute(a)
+pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+    let ret_val: poly8x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33766,7 +48287,7 @@ pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
+pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
@@ -33774,6 +48295,7 @@ pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33790,6 +48312,31 @@ pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
+    let ret_val: poly16x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
@@ -33797,6 +48344,7 @@ pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33813,6 +48361,7 @@ pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
@@ -33820,6 +48369,7 @@ pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33839,10 +48389,41 @@ pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33862,10 +48443,37 @@ pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33885,10 +48493,37 @@ pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: int32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33908,10 +48543,41 @@ pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33931,10 +48597,37 @@ pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33954,10 +48647,37 @@ pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: uint32x4_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33977,10 +48697,41 @@ pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     transmute(a)
 }
 #[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly8x16_t = transmute(a);
+    simd_shuffle!(
+        ret_val,
+        ret_val,
+        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    )
+}
+#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -33999,6 +48750,32 @@ pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
 pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     transmute(a)
 }
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
+    let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let ret_val: poly16x8_t = transmute(a);
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
 #[doc = "Rounding halving add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrhadd_s8)"]
 #[doc = "## Safety"]
@@ -36216,6 +50993,7 @@ pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -36235,10 +51013,38 @@ pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     transmute(vrsubhn_s16(transmute(a), transmute(b)))
 }
 #[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint16x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -36258,10 +51064,38 @@ pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     transmute(vrsubhn_s32(transmute(a), transmute(b)))
 }
 #[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+    let b: uint32x4_t = simd_shuffle!(b, b, [3, 2, 1, 0]);
+    let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+}
+#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
 #[doc = "## Safety"]
 #[doc = "  * Neon instrinsic unsafe"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -36280,6 +51114,33 @@ pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     transmute(vrsubhn_s64(transmute(a), transmute(b)))
 }
+#[doc = "Rounding subtract returning high narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(rsubhn)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+    let b: uint64x2_t = simd_shuffle!(b, b, [1, 0]);
+    let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [1, 0])
+}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f32)"]
 #[doc = "## Safety"]
@@ -36880,6 +51741,400 @@ pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2
     static_assert_uimm_bits!(LANE, 1);
     simd_insert!(b, LANE as u32, a)
 }
+#[doc = "SHA1 hash update accelerator, choose."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1cq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1c))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1c"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1c")]
+        fn _vsha1cq_u32(hash_abcd: int32x4_t, hash_e: i32, wk: int32x4_t) -> int32x4_t;
+    }
+    _vsha1cq_u32(hash_abcd.as_signed(), hash_e.as_signed(), wk.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 fixed rotate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1h_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1h))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1h"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1h")]
+        fn _vsha1h_u32(hash_e: i32) -> i32;
+    }
+    _vsha1h_u32(hash_e.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 hash update accelerator, majority"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1mq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1m))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1m"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1m")]
+        fn _vsha1mq_u32(hash_abcd: int32x4_t, hash_e: i32, wk: int32x4_t) -> int32x4_t;
+    }
+    _vsha1mq_u32(hash_abcd.as_signed(), hash_e.as_signed(), wk.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 hash update accelerator, parity"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1pq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1p))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1p"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1p")]
+        fn _vsha1pq_u32(hash_abcd: int32x4_t, hash_e: i32, wk: int32x4_t) -> int32x4_t;
+    }
+    _vsha1pq_u32(hash_abcd.as_signed(), hash_e.as_signed(), wk.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su0q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su0))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1su0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su0")]
+        fn _vsha1su0q_u32(w0_3: int32x4_t, w4_7: int32x4_t, w8_11: int32x4_t) -> int32x4_t;
+    }
+    _vsha1su0q_u32(w0_3.as_signed(), w4_7.as_signed(), w8_11.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha1su1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha1su1))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha1su1"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha1su1")]
+        fn _vsha1su1q_u32(tw0_3: int32x4_t, w12_15: int32x4_t) -> int32x4_t;
+    }
+    _vsha1su1q_u32(tw0_3.as_signed(), w12_15.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 schedule update accelerator, upper part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256h2q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h2))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha256h2q_u32(
+    hash_abcd: uint32x4_t,
+    hash_efgh: uint32x4_t,
+    wk: uint32x4_t,
+) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256h2"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h2")]
+        fn _vsha256h2q_u32(hash_abcd: int32x4_t, hash_efgh: int32x4_t, wk: int32x4_t) -> int32x4_t;
+    }
+    _vsha256h2q_u32(hash_abcd.as_signed(), hash_efgh.as_signed(), wk.as_signed()).as_unsigned()
+}
+#[doc = "SHA1 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256hq_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256h))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha256hq_u32(
+    hash_abcd: uint32x4_t,
+    hash_efgh: uint32x4_t,
+    wk: uint32x4_t,
+) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256h"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256h")]
+        fn _vsha256hq_u32(hash_abcd: int32x4_t, hash_efgh: int32x4_t, wk: int32x4_t) -> int32x4_t;
+    }
+    _vsha256hq_u32(hash_abcd.as_signed(), hash_efgh.as_signed(), wk.as_signed()).as_unsigned()
+}
+#[doc = "SHA256 schedule update accelerator, first part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su0q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su0))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256su0"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su0")]
+        fn _vsha256su0q_u32(w0_3: int32x4_t, w4_7: int32x4_t) -> int32x4_t;
+    }
+    _vsha256su0q_u32(w0_3.as_signed(), w4_7.as_signed()).as_unsigned()
+}
+#[doc = "SHA256 schedule update accelerator, second part."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsha256su1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "sha2")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(test, assert_instr(sha256su1))]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "aarch64_neon_crypto_intrinsics", since = "1.72.0")
+)]
+pub unsafe fn vsha256su1q_u32(
+    tw0_3: uint32x4_t,
+    w8_11: uint32x4_t,
+    w12_15: uint32x4_t,
+) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.crypto.sha256su1"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.sha256su1")]
+        fn _vsha256su1q_u32(tw0_3: int32x4_t, w8_11: int32x4_t, w12_15: int32x4_t) -> int32x4_t;
+    }
+    _vsha256su1q_u32(tw0_3.as_signed(), w8_11.as_signed(), w12_15.as_signed()).as_unsigned()
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
+        fn _vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    _vshiftins_v16i8(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
+        fn _vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
+    }
+    _vshiftins_v1i64(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
+        fn _vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    _vshiftins_v2i32(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
+        fn _vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    _vshiftins_v2i64(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
+        fn _vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    _vshiftins_v4i16(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
+        fn _vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    _vshiftins_v4i32(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
+        fn _vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    _vshiftins_v8i16(a, b, c)
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
+        fn _vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    _vshiftins_v8i8(a, b, c)
+}
 #[doc = "Shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"]
 #[doc = "## Safety"]
@@ -38524,6 +53779,334 @@ pub unsafe fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     simd_cast(simd_shr(a, vdupq_n_u64(N as _)))
 }
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vshiftins_v8i8(a, b, int8x8_t::splat(N as i8))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vshiftins_v16i8(a, b, int8x16_t::splat(N as i8))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vshiftins_v4i16(a, b, int16x4_t::splat(N as i16))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vshiftins_v8i16(a, b, int16x8_t::splat(N as i16))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    vshiftins_v2i32(a, b, int32x2_t::splat(N))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    vshiftins_v4i32(a, b, int32x4_t::splat(N))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    vshiftins_v1i64(a, b, int64x1_t::splat(N as i64))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    vshiftins_v2i64(a, b, int64x2_t::splat(N as i64))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vshiftins_v8i8(
+        transmute(a),
+        transmute(b),
+        int8x8_t::splat(N as i8),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vshiftins_v16i8(
+        transmute(a),
+        transmute(b),
+        int8x16_t::splat(N as i8),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vshiftins_v4i16(
+        transmute(a),
+        transmute(b),
+        int16x4_t::splat(N as i16),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vshiftins_v8i16(
+        transmute(a),
+        transmute(b),
+        int16x8_t::splat(N as i16),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(N >= 0 && N <= 31);
+    transmute(vshiftins_v2i32(
+        transmute(a),
+        transmute(b),
+        int32x2_t::splat(N as i32),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(N >= 0 && N <= 31);
+    transmute(vshiftins_v4i32(
+        transmute(a),
+        transmute(b),
+        int32x4_t::splat(N as i32),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t::splat(N as i64),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(N >= 0 && N <= 63);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(N as i64),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vshiftins_v8i8(
+        transmute(a),
+        transmute(b),
+        int8x8_t::splat(N as i8),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    transmute(vshiftins_v16i8(
+        transmute(a),
+        transmute(b),
+        int8x16_t::splat(N as i8),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vshiftins_v4i16(
+        transmute(a),
+        transmute(b),
+        int16x4_t::splat(N as i16),
+    ))
+}
+#[doc = "Shift Left and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsli.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    transmute(vshiftins_v8i16(
+        transmute(a),
+        transmute(b),
+        int16x8_t::splat(N as i16),
+    ))
+}
 #[doc = "Signed shift right and accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"]
 #[doc = "## Safety"]
@@ -38924,6 +54507,710 @@ pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
     static_assert!(N >= 1 && N <= 64);
     simd_add(a, vshrq_n_u64::<N>(b))
 }
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    vshiftins_v8i8(a, b, int8x8_t::splat(-N as i8))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    vshiftins_v16i8(a, b, int8x16_t::splat(-N as i8))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    vshiftins_v4i16(a, b, int16x4_t::splat(-N as i16))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    vshiftins_v8i16(a, b, int16x8_t::splat(-N as i16))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert!(1 <= N && N <= 32);
+    vshiftins_v2i32(a, b, int32x2_t::splat(-N as i32))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert!(1 <= N && N <= 32);
+    vshiftins_v4i32(a, b, int32x4_t::splat(-N as i32))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    static_assert!(1 <= N && N <= 64);
+    vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    transmute(vshiftins_v8i8(
+        transmute(a),
+        transmute(b),
+        int8x8_t::splat(-N as i8),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    transmute(vshiftins_v16i8(
+        transmute(a),
+        transmute(b),
+        int8x16_t::splat(-N as i8),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    transmute(vshiftins_v4i16(
+        transmute(a),
+        transmute(b),
+        int16x4_t::splat(-N as i16),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    transmute(vshiftins_v8i16(
+        transmute(a),
+        transmute(b),
+        int16x8_t::splat(-N as i16),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert!(1 <= N && N <= 32);
+    transmute(vshiftins_v2i32(
+        transmute(a),
+        transmute(b),
+        int32x2_t::splat(-N),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.32", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert!(1 <= N && N <= 32);
+    transmute(vshiftins_v4i32(
+        transmute(a),
+        transmute(b),
+        int32x4_t::splat(-N),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    static_assert!(1 <= N && N <= 64);
+    transmute(vshiftins_v1i64(
+        transmute(a),
+        transmute(b),
+        int64x1_t::splat(-N as i64),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.64", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    static_assert!(1 <= N && N <= 64);
+    transmute(vshiftins_v2i64(
+        transmute(a),
+        transmute(b),
+        int64x2_t::splat(-N as i64),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    static_assert!(1 <= N && N <= 8);
+    transmute(vshiftins_v8i8(
+        transmute(a),
+        transmute(b),
+        int8x8_t::splat(-N as i8),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.8", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    static_assert!(1 <= N && N <= 8);
+    transmute(vshiftins_v16i8(
+        transmute(a),
+        transmute(b),
+        int8x16_t::splat(-N as i8),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
+    static_assert!(1 <= N && N <= 16);
+    transmute(vshiftins_v4i16(
+        transmute(a),
+        transmute(b),
+        int16x4_t::splat(-N as i16),
+    ))
+}
+#[doc = "Shift Right and Insert (immediate)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsri.16", N = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
+    static_assert!(1 <= N && N <= 16);
+    transmute(vshiftins_v8i16(
+        transmute(a),
+        transmute(b),
+        int16x8_t::splat(-N as i16),
+    ))
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
+    vst1_v2f32(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<f32>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
+    vst1q_v4f32(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<f32>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
+    vst1_v8i8(ptr as *const i8, a, crate::mem::align_of::<i8>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
+    vst1q_v16i8(ptr as *const i8, a, crate::mem::align_of::<i8>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
+    vst1_v4i16(ptr as *const i8, a, crate::mem::align_of::<i16>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
+    vst1q_v8i16(ptr as *const i8, a, crate::mem::align_of::<i16>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
+    vst1_v2i32(ptr as *const i8, a, crate::mem::align_of::<i32>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
+    vst1q_v4i32(ptr as *const i8, a, crate::mem::align_of::<i32>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
+    vst1_v1i64(ptr as *const i8, a, crate::mem::align_of::<i64>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
+    vst1q_v2i64(ptr as *const i8, a, crate::mem::align_of::<i64>() as i32)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
+    vst1_v8i8(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u8>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
+    vst1q_v16i8(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u8>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
+    vst1_v4i16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u16>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
+    vst1q_v8i16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u16>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
+    vst1_v2i32(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u32>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
+    vst1q_v4i32(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u32>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
+    vst1_v1i64(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u64>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
+    vst1q_v2i64(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<u64>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
+    vst1_v8i8(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p8>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
+    vst1q_v16i8(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p8>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
+    vst1_v4i16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p16>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
+    vst1q_v8i16(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p16>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
+    vst1_v1i64(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p64>() as i32,
+    )
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
+    vst1q_v2i64(
+        ptr as *const i8,
+        transmute(a),
+        crate::mem::align_of::<p64>() as i32,
+    )
+}
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
 #[doc = "## Safety"]
@@ -38935,7 +55222,7 @@ pub unsafe fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2f32.p0")]
         fn _vst1_f32_x2(ptr: *mut f32, a: float32x2_t, b: float32x2_t);
     }
     _vst1_f32_x2(a, b.0, b.1)
@@ -38951,7 +55238,7 @@ pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4f32.p0")]
         fn _vst1q_f32_x2(ptr: *mut f32, a: float32x4_t, b: float32x4_t);
     }
     _vst1q_f32_x2(a, b.0, b.1)
@@ -39005,7 +55292,7 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v2f32.p0")]
         fn _vst1_f32_x3(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t);
     }
     _vst1_f32_x3(a, b.0, b.1, b.2)
@@ -39021,7 +55308,7 @@ pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.v4f32.p0")]
         fn _vst1q_f32_x3(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t);
     }
     _vst1q_f32_x3(a, b.0, b.1, b.2)
@@ -39075,7 +55362,7 @@ pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32.p0")]
         fn _vst1_f32_x4(
             ptr: *mut f32,
             a: float32x2_t,
@@ -39097,7 +55384,7 @@ pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32.p0")]
         fn _vst1q_f32_x4(
             ptr: *mut f32,
             a: float32x4_t,
@@ -40034,7 +56321,7 @@ pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i8.p0")]
         fn _vst1_s8_x2(ptr: *mut i8, a: int8x8_t, b: int8x8_t);
     }
     _vst1_s8_x2(a, b.0, b.1)
@@ -40050,7 +56337,7 @@ pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v16i8.p0")]
         fn _vst1q_s8_x2(ptr: *mut i8, a: int8x16_t, b: int8x16_t);
     }
     _vst1q_s8_x2(a, b.0, b.1)
@@ -40066,7 +56353,7 @@ pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i16.p0")]
         fn _vst1_s16_x2(ptr: *mut i16, a: int16x4_t, b: int16x4_t);
     }
     _vst1_s16_x2(a, b.0, b.1)
@@ -40082,7 +56369,7 @@ pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v8i16.p0")]
         fn _vst1q_s16_x2(ptr: *mut i16, a: int16x8_t, b: int16x8_t);
     }
     _vst1q_s16_x2(a, b.0, b.1)
@@ -40098,7 +56385,7 @@ pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i32.p0")]
         fn _vst1_s32_x2(ptr: *mut i32, a: int32x2_t, b: int32x2_t);
     }
     _vst1_s32_x2(a, b.0, b.1)
@@ -40114,7 +56401,7 @@ pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v4i32.p0")]
         fn _vst1q_s32_x2(ptr: *mut i32, a: int32x4_t, b: int32x4_t);
     }
     _vst1q_s32_x2(a, b.0, b.1)
@@ -40130,7 +56417,7 @@ pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v1i64.p0")]
         fn _vst1_s64_x2(ptr: *mut i64, a: int64x1_t, b: int64x1_t);
     }
     _vst1_s64_x2(a, b.0, b.1)
@@ -40146,7 +56433,7 @@ pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.v2i64.p0")]
         fn _vst1q_s64_x2(ptr: *mut i64, a: int64x2_t, b: int64x2_t);
     }
     _vst1q_s64_x2(a, b.0, b.1)
@@ -40314,7 +56601,7 @@ pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8.p0")]
         fn _vst1_s8_x3(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t);
     }
     _vst1_s8_x3(a, b.0, b.1, b.2)
@@ -40330,7 +56617,7 @@ pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8.p0")]
         fn _vst1q_s8_x3(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t);
     }
     _vst1q_s8_x3(a, b.0, b.1, b.2)
@@ -40346,7 +56633,7 @@ pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16.p0")]
         fn _vst1_s16_x3(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t);
     }
     _vst1_s16_x3(a, b.0, b.1, b.2)
@@ -40362,7 +56649,7 @@ pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16.p0")]
         fn _vst1q_s16_x3(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t);
     }
     _vst1q_s16_x3(a, b.0, b.1, b.2)
@@ -40378,7 +56665,7 @@ pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32.p0")]
         fn _vst1_s32_x3(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t);
     }
     _vst1_s32_x3(a, b.0, b.1, b.2)
@@ -40394,7 +56681,7 @@ pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32.p0")]
         fn _vst1q_s32_x3(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t);
     }
     _vst1q_s32_x3(a, b.0, b.1, b.2)
@@ -40410,7 +56697,7 @@ pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64.p0")]
         fn _vst1_s64_x3(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t);
     }
     _vst1_s64_x3(a, b.0, b.1, b.2)
@@ -40426,7 +56713,7 @@ pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64.p0")]
         fn _vst1q_s64_x3(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t);
     }
     _vst1q_s64_x3(a, b.0, b.1, b.2)
@@ -40594,7 +56881,7 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8.p0")]
         fn _vst1_s8_x4(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t);
     }
     _vst1_s8_x4(a, b.0, b.1, b.2, b.3)
@@ -40610,7 +56897,7 @@ pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8.p0")]
         fn _vst1q_s8_x4(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t);
     }
     _vst1q_s8_x4(a, b.0, b.1, b.2, b.3)
@@ -40626,7 +56913,7 @@ pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16.p0")]
         fn _vst1_s16_x4(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t);
     }
     _vst1_s16_x4(a, b.0, b.1, b.2, b.3)
@@ -40642,7 +56929,7 @@ pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16.p0")]
         fn _vst1q_s16_x4(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t);
     }
     _vst1q_s16_x4(a, b.0, b.1, b.2, b.3)
@@ -40658,7 +56945,7 @@ pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32.p0")]
         fn _vst1_s32_x4(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t);
     }
     _vst1_s32_x4(a, b.0, b.1, b.2, b.3)
@@ -40674,7 +56961,7 @@ pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32.p0")]
         fn _vst1q_s32_x4(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t);
     }
     _vst1q_s32_x4(a, b.0, b.1, b.2, b.3)
@@ -40690,7 +56977,7 @@ pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64.p0")]
         fn _vst1_s64_x4(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t);
     }
     _vst1_s64_x4(a, b.0, b.1, b.2, b.3)
@@ -40706,7 +56993,7 @@ pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
 #[cfg_attr(test, assert_instr(vst1))]
 pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64.p0")]
         fn _vst1q_s64_x4(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t);
     }
     _vst1q_s64_x4(a, b.0, b.1, b.2, b.3)
@@ -41539,6 +57826,176 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
 pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
     vst1q_s16_x4(transmute(a), transmute(b))
 }
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v1i64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")]
+        fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
+    }
+    _vst1_v1i64(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")]
+        fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
+    }
+    _vst1_v2f32(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2i32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")]
+        fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
+    }
+    _vst1_v2i32(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4i16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")]
+        fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
+    }
+    _vst1_v4i16(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v8i8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")]
+        fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
+    }
+    _vst1_v8i8(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v16i8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
+unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")]
+        fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
+    }
+    _vst1q_v16i8(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v2i64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
+unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")]
+        fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
+    }
+    _vst1q_v2i64(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")]
+        fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
+    }
+    _vst1q_v4f32(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4i32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
+unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")]
+        fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
+    }
+    _vst1q_v4i32(addr, val, align)
+}
+#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8i16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
+unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")]
+        fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
+    }
+    _vst1q_v8i16(addr, val, align)
+}
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"]
 #[doc = "## Safety"]
@@ -41727,7 +58184,7 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2f32.p0")]
         fn _vst2_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32);
     }
     _vst2_f32(a as _, b.0, b.1, 4)
@@ -41743,7 +58200,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4f32.p0")]
         fn _vst2q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32);
     }
     _vst2q_f32(a as _, b.0, b.1, 4)
@@ -41759,7 +58216,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i8.p0")]
         fn _vst2_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32);
     }
     _vst2_s8(a as _, b.0, b.1, 1)
@@ -41775,7 +58232,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v16i8.p0")]
         fn _vst2q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32);
     }
     _vst2q_s8(a as _, b.0, b.1, 1)
@@ -41791,7 +58248,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i16.p0")]
         fn _vst2_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32);
     }
     _vst2_s16(a as _, b.0, b.1, 2)
@@ -41807,7 +58264,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v8i16.p0")]
         fn _vst2q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32);
     }
     _vst2q_s16(a as _, b.0, b.1, 2)
@@ -41823,7 +58280,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v2i32.p0")]
         fn _vst2_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32);
     }
     _vst2_s32(a as _, b.0, b.1, 4)
@@ -41839,7 +58296,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
 #[cfg_attr(test, assert_instr(vst2))]
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v4i32.p0")]
         fn _vst2q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32);
     }
     _vst2q_s32(a as _, b.0, b.1, 4)
@@ -42004,7 +58461,7 @@ pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
 pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2f32.p0")]
         fn _vst2_lane_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32);
     }
     _vst2_lane_f32(a as _, b.0, b.1, LANE, 4)
@@ -42022,7 +58479,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
 pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4f32.p0")]
         fn _vst2q_lane_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32);
     }
     _vst2q_lane_f32(a as _, b.0, b.1, LANE, 4)
@@ -42040,7 +58497,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
 pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i8.p0")]
         fn _vst2_lane_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32);
     }
     _vst2_lane_s8(a as _, b.0, b.1, LANE, 1)
@@ -42058,7 +58515,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
 pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i16.p0")]
         fn _vst2_lane_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32);
     }
     _vst2_lane_s16(a as _, b.0, b.1, LANE, 2)
@@ -42076,7 +58533,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
 pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
     static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v8i16.p0")]
         fn _vst2q_lane_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32);
     }
     _vst2q_lane_s16(a as _, b.0, b.1, LANE, 2)
@@ -42094,7 +58551,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
 pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
     static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v2i32.p0")]
         fn _vst2_lane_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32);
     }
     _vst2_lane_s32(a as _, b.0, b.1, LANE, 4)
@@ -42112,7 +58569,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
 pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
     static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.v4i32.p0")]
         fn _vst2q_lane_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32);
     }
     _vst2q_lane_s32(a as _, b.0, b.1, LANE, 4)
@@ -42351,7 +58808,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")]
         fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
     }
     _vst2_s64(a as _, b.0, b.1, 8)
@@ -46004,6 +62461,928 @@ pub unsafe fn vsudotq_lane_s32<const LANE: i32>(
     let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
     vusdotq_s32(a, transmute(c), b)
 }
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+unsafe fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl1")]
+        fn _vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    _vtbl1(a, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    vtbl1(a, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vtbl1(transmute(a), transmute(b)))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vtbl1(transmute(a), transmute(b)))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+unsafe fn vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl2")]
+        fn _vtbl2(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    _vtbl2(a, b, c)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
+    vtbl2(a.0, a.1, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x2_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x2_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+unsafe fn vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl3")]
+        fn _vtbl3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
+    }
+    _vtbl3(a, b, c, d)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
+    vtbl3(a.0, a.1, a.2, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vtbl3(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(b),
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x3_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbl3(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(b),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vtbl3(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(b),
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x3_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbl3(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(b),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+unsafe fn vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbl4")]
+        fn _vtbl4(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
+    }
+    _vtbl4(a, b, c, d, e)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
+    vtbl4(a.0, a.1, a.2, a.3, b)
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    transmute(vtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        transmute(b),
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
+    let mut a: uint8x8x4_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        transmute(b),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    transmute(vtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        transmute(b),
+    ))
+}
+#[doc = "Table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbl))]
+pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
+    let mut a: poly8x8x4_t = a;
+    a.0 = simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.1 = simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.2 = simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    a.3 = simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbl4(
+        transmute(a.0),
+        transmute(a.1),
+        transmute(a.2),
+        transmute(a.3),
+        transmute(b),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+unsafe fn vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx1")]
+        fn _vtbx1(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    _vtbx1(a, b, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    vtbx1(a, b, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: uint8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vtbx1(transmute(a), transmute(b), transmute(c)))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let b: poly8x8_t = simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+unsafe fn vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx2")]
+        fn _vtbx2(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t;
+    }
+    _vtbx2(a, b, c, d)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
+    vtbx2(a, b.0, b.1, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vtbx2(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x2_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbx2(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vtbx2(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x2_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbx2(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+unsafe fn vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx3")]
+        fn _vtbx3(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t) -> int8x8_t;
+    }
+    _vtbx3(a, b, c, d, e)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
+    vtbx3(a, b.0, b.1, b.2, c)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x3_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x3_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbx3(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+unsafe fn vtbx4(
+    a: int8x8_t,
+    b: int8x8_t,
+    c: int8x8_t,
+    d: int8x8_t,
+    e: int8x8_t,
+    f: int8x8_t,
+) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vtbx4")]
+        fn _vtbx4(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            e: int8x8_t,
+            f: int8x8_t,
+        ) -> int8x8_t;
+    }
+    _vtbx4(a, b, c, d, e, f)
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    vtbx4(
+        a,
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    )
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    let mut b: int8x8x4_t = b;
+    let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: int8x8_t = vtbx4(
+        a,
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        c,
+    );
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    transmute(vtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
+    let mut b: uint8x8x4_t = b;
+    let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: uint8x8_t = transmute(vtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    transmute(vtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        transmute(c),
+    ))
+}
+#[doc = "Extended table look-up"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vtbx))]
+pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    let mut b: poly8x8x4_t = b;
+    let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+    b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let c: uint8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
+    let ret_val: poly8x8_t = transmute(vtbx4(
+        transmute(a),
+        transmute(b.0),
+        transmute(b.1),
+        transmute(b.2),
+        transmute(b.3),
+        transmute(c),
+    ));
+    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+}
 #[doc = "Transpose elements"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f32)"]
 #[doc = "## Safety"]
@@ -47002,6 +64381,37 @@ pub unsafe fn vusdotq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_
     }
     _vusdotq_s32(a, b.as_signed(), c)
 }
+#[doc = "Unsigned and signed 8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusmmlaq_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usmmla)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
+        fn _vusmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    _vusmmlaq_s32(a, b.as_signed(), c)
+}
 #[doc = "Unzip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp_f32)"]
 #[doc = "## Safety"]
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index b7854051ae..623d39c398 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -1144,296 +1144,6 @@ impl_sign_conversions_neon! {
     (uint8x8x4_t, int8x8x4_t)
 }
 
-#[allow(improper_ctypes)]
-unsafe extern "unadjusted" {
-    // absolute value (64-bit)
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v8i8"
-    )]
-    fn vabs_s8_(a: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v4i16"
-    )]
-    fn vabs_s16_(a: int16x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v2i32"
-    )]
-    fn vabs_s32_(a: int32x2_t) -> int32x2_t;
-    // absolute value (128-bit)
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v16i8"
-    )]
-    fn vabsq_s8_(a: int8x16_t) -> int8x16_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v8i16"
-    )]
-    fn vabsq_s16_(a: int16x8_t) -> int16x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.abs.v4i32"
-    )]
-    fn vabsq_s32_(a: int32x4_t) -> int32x4_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.sminp.v8i8"
-    )]
-    fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.sminp.v4i16"
-    )]
-    fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.sminp.v2i32"
-    )]
-    fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uminp.v8i8"
-    )]
-    fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uminp.v4i16"
-    )]
-    fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uminp.v2i32"
-    )]
-    fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.fminp.v2f32"
-    )]
-    fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.smaxp.v8i8"
-    )]
-    fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.smaxp.v4i16"
-    )]
-    fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.smaxp.v2i32"
-    )]
-    fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.umaxp.v8i8"
-    )]
-    fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.umaxp.v4i16"
-    )]
-    fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.umaxp.v2i32"
-    )]
-    fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.fmaxp.v2f32"
-    )]
-    fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.raddhn.v8i8"
-    )]
-    fn vraddhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.raddhn.v4i16"
-    )]
-    fn vraddhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.raddhn.v2i32"
-    )]
-    fn vraddhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.addp.v4i16"
-    )]
-    fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.addp.v2i32"
-    )]
-    fn vpadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.addp.v8i8"
-    )]
-    fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
-    )]
-    pub(crate) fn vpaddl_s8_(a: int8x8_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
-    )]
-    pub(crate) fn vpaddl_s16_(a: int16x4_t) -> int32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
-    )]
-    pub(crate) fn vpaddl_s32_(a: int32x2_t) -> int64x1_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
-    )]
-    pub(crate) fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
-    )]
-    pub(crate) fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
-    )]
-    pub(crate) fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
-    )]
-    pub(crate) fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
-    )]
-    pub(crate) fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
-    )]
-    pub(crate) fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
-    )]
-    pub(crate) fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
-    )]
-    pub(crate) fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
-    )]
-    pub(crate) fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctpop.v8i8"
-    )]
-    fn vcnt_s8_(a: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctpop.v16i8"
-    )]
-    fn vcntq_s8_(a: int8x16_t) -> int8x16_t;
-
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v8i8"
-    )]
-    fn vclz_s8_(a: int8x8_t) -> int8x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v16i8"
-    )]
-    fn vclzq_s8_(a: int8x16_t) -> int8x16_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v4i16"
-    )]
-    fn vclz_s16_(a: int16x4_t) -> int16x4_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v8i16"
-    )]
-    fn vclzq_s16_(a: int16x8_t) -> int16x8_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v2i32"
-    )]
-    fn vclz_s32_(a: int32x2_t) -> int32x2_t;
-    #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")]
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.ctlz.v4i32"
-    )]
-    fn vclzq_s32_(a: int32x4_t) -> int32x4_t;
-}
-
 /// Load one single-element structure to one lane of one register.
 #[inline]
 #[target_feature(enable = "neon")]
@@ -2771,248 +2481,6 @@ pub unsafe fn vabaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
     simd_add(a, vabdq_u32(b, c))
 }
 
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabs_s8(a: int8x8_t) -> int8x8_t {
-    vabs_s8_(a)
-}
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabs_s16(a: int16x4_t) -> int16x4_t {
-    vabs_s16_(a)
-}
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabs_s32(a: int32x2_t) -> int32x2_t {
-    vabs_s32_(a)
-}
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabsq_s8(a: int8x16_t) -> int8x16_t {
-    vabsq_s8_(a)
-}
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
-    vabsq_s16_(a)
-}
-/// Absolute value (wrapping).
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(abs)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
-    vabsq_s32_(a)
-}
-
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    vpadd_s16_(a, b)
-}
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    vpadd_s32_(a, b)
-}
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vpadd_s8_(a, b)
-}
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    transmute(vpadd_s16_(transmute(a), transmute(b)))
-}
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    transmute(vpadd_s32_(transmute(a), transmute(b)))
-}
-/// Add pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    transmute(vpadd_s8_(transmute(a), transmute(b)))
-}
-
 /// Vector add.
 #[inline]
 #[target_feature(enable = "neon")]
@@ -4165,14 +3633,14 @@ pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> ui
     simd_shuffle!(r, x, [0, 1, 2, 3])
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4182,18 +3650,18 @@ pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> ui
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    vraddhn_s16_(a, b)
+pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4203,18 +3671,18 @@ pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    vraddhn_s32_(a, b)
+pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4224,18 +3692,18 @@ pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    vraddhn_s64_(a, b)
+pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4245,18 +3713,18 @@ pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    transmute(vraddhn_s16_(transmute(a), transmute(b)))
+pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4266,18 +3734,18 @@ pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    transmute(vraddhn_s32_(transmute(a), transmute(b)))
+pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow.
+/// Vector narrow integer.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4287,18 +3755,18 @@ pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    transmute(vraddhn_s64_(transmute(a), transmute(b)))
+pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4308,19 +3776,18 @@ pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
-    let x = vraddhn_s16_(a, b);
-    simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4330,19 +3797,18 @@ pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
-    let x = vraddhn_s32_(a, b);
-    simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4352,19 +3818,18 @@ pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int1
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
-    let x = vraddhn_s64_(a, b);
-    simd_shuffle!(r, x, [0, 1, 2, 3])
+pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4374,19 +3839,18 @@ pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = transmute(vraddhn_s16_(transmute(a), transmute(b)));
-    simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4396,19 +3860,18 @@ pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> ui
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = transmute(vraddhn_s32_(transmute(a), transmute(b)));
-    simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
+    simd_cast(a)
 }
 
-/// Rounding Add returning High Narrow (high half).
+/// Vector long move.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
+    assert_instr(uxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4418,19 +3881,18 @@ pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = transmute(vraddhn_s64_(transmute(a), transmute(b)));
-    simd_shuffle!(r, x, [0, 1, 2, 3])
+pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
+    simd_cast(a)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4440,18 +3902,19 @@ pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> u
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
-    vpaddl_s8_(a)
+pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
+    let b = int8x8_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4461,18 +3924,19 @@ pub unsafe fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
-    vpaddl_s16_(a)
+pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
+    let b = int8x16_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4482,18 +3946,19 @@ pub unsafe fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
-    vpaddl_s32_(a)
+pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
+    let b = int16x4_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4503,18 +3968,19 @@ pub unsafe fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
-    vpaddlq_s8_(a)
+pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
+    let b = int16x8_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4524,18 +3990,19 @@ pub unsafe fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
-    vpaddlq_s16_(a)
+pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
+    let b = int32x2_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Signed Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4545,18 +4012,19 @@ pub unsafe fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
-    vpaddlq_s32_(a)
+pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
+    let b = int32x4_t::splat(-1);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4566,18 +4034,19 @@ pub unsafe fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
-    vpaddl_u8_(a)
+pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
+    let b = uint8x8_t::splat(255);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4587,18 +4056,19 @@ pub unsafe fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
-    vpaddl_u16_(a)
+pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
+    let b = uint8x16_t::splat(255);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4608,18 +4078,19 @@ pub unsafe fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
-    vpaddl_u32_(a)
+pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
+    let b = uint16x4_t::splat(65_535);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4629,18 +4100,19 @@ pub unsafe fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
-    vpaddlq_u8_(a)
+pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
+    let b = uint16x8_t::splat(65_535);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4650,18 +4122,19 @@ pub unsafe fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
-    vpaddlq_u16_(a)
+pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
+    let b = uint32x2_t::splat(4_294_967_295);
+    simd_xor(a, b)
 }
 
-/// Unsigned Add Long Pairwise.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4671,18 +4144,19 @@ pub unsafe fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
-    vpaddlq_u32_(a)
+pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
+    let b = uint32x4_t::splat(4_294_967_295);
+    simd_xor(a, b)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4692,18 +4166,19 @@ pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t {
-    simd_cast(a)
+pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
+    let b = poly8x8_t::splat(255);
+    simd_xor(a, b)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise not.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4713,18 +4188,19 @@ pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t {
-    simd_cast(a)
+pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
+    let b = poly8x16_t::splat(255);
+    simd_xor(a, b)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4734,18 +4210,19 @@ pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t {
-    simd_cast(a)
+pub unsafe fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    let c = int8x8_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4755,18 +4232,19 @@ pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    simd_cast(a)
+pub unsafe fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    let c = int8x16_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4776,18 +4254,19 @@ pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    simd_cast(a)
+pub unsafe fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    let c = int16x4_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector narrow integer.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4797,18 +4276,19 @@ pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    simd_cast(a)
+pub unsafe fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    let c = int16x8_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4818,18 +4298,19 @@ pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t {
-    simd_cast(a)
+pub unsafe fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    let c = int32x2_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4839,18 +4320,19 @@ pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t {
-    simd_cast(a)
+pub unsafe fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    let c = int32x4_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4860,18 +4342,19 @@ pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t {
-    simd_cast(a)
+pub unsafe fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    let c = int64x1_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4881,18 +4364,19 @@ pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
-    simd_cast(a)
+pub unsafe fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    let c = int64x2_t::splat(-1);
+    simd_and(simd_xor(b, c), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4902,18 +4386,19 @@ pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
-    simd_cast(a)
+pub unsafe fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let c = int8x8_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector long move.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4923,18 +4408,19 @@ pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
-    simd_cast(a)
+pub unsafe fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let c = int8x16_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4944,19 +4430,19 @@ pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
-    let b = int8x8_t::splat(-1);
-    simd_xor(a, b)
+pub unsafe fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let c = int16x4_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4966,19 +4452,19 @@ pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
-    let b = int8x16_t::splat(-1);
-    simd_xor(a, b)
+pub unsafe fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let c = int16x8_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -4988,19 +4474,19 @@ pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
-    let b = int16x4_t::splat(-1);
-    simd_xor(a, b)
+pub unsafe fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let c = int32x2_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5010,19 +4496,19 @@ pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
-    let b = int16x8_t::splat(-1);
-    simd_xor(a, b)
+pub unsafe fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let c = int32x4_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5032,19 +4518,19 @@ pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
-    let b = int32x2_t::splat(-1);
-    simd_xor(a, b)
-}
+pub unsafe fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c = int64x1_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
+}
 
-/// Vector bitwise not.
+/// Vector bitwise bit clear
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bic)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5054,19 +4540,23 @@ pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
-    let b = int32x4_t::splat(-1);
-    simd_xor(a, b)
+pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c = int64x2_t::splat(-1);
+    simd_and(simd_xor(b, transmute(c)), a)
 }
 
-/// Vector bitwise not.
+/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
+/// to the corresponding bit from the first source SIMD&FP register when the original
+/// destination bit was 1, otherwise from the second source SIMD&FP register.
+
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5076,19 +4566,22 @@ pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
-    let b = uint8x8_t::splat(255);
-    simd_xor(a, b)
+pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    let not = int8x8_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5098,19 +4591,22 @@ pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
-    let b = uint8x16_t::splat(255);
-    simd_xor(a, b)
+pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    let not = int16x4_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5120,19 +4616,22 @@ pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
-    let b = uint16x4_t::splat(65_535);
-    simd_xor(a, b)
+pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    let not = int32x2_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5142,19 +4641,22 @@ pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
-    let b = uint16x8_t::splat(65_535);
-    simd_xor(a, b)
+pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
+    let not = int64x1_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5164,19 +4666,19 @@ pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
-    let b = uint32x2_t::splat(4_294_967_295);
-    simd_xor(a, b)
+pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    let not = int8x8_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5186,19 +4688,19 @@ pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
-    let b = uint32x4_t::splat(4_294_967_295);
-    simd_xor(a, b)
+pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    let not = int16x4_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5208,19 +4710,19 @@ pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
-    let b = poly8x8_t::splat(255);
-    simd_xor(a, b)
+pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    let not = int32x2_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise not.
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5230,19 +4732,19 @@ pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
-    let b = poly8x16_t::splat(255);
-    simd_xor(a, b)
+pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t {
+    let not = int64x1_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5252,19 +4754,22 @@ pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    let c = int8x8_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    let not = int32x2_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5274,19 +4779,22 @@ pub unsafe fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    let c = int8x16_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t {
+    let not = int8x8_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select.
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5296,19 +4804,22 @@ pub unsafe fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    let c = int16x4_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t {
+    let not = int16x4_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5318,19 +4829,22 @@ pub unsafe fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    let c = int16x8_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    let not = int8x16_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5340,19 +4854,22 @@ pub unsafe fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    let c = int32x2_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    let not = int16x8_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5362,19 +4879,22 @@ pub unsafe fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    let c = int32x4_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    let not = int32x4_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5384,19 +4904,22 @@ pub unsafe fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    let c = int64x1_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+    let not = int64x2_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5406,19 +4929,19 @@ pub unsafe fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    let c = int64x2_t::splat(-1);
-    simd_and(simd_xor(b, c), a)
+pub unsafe fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    let not = int8x16_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5428,19 +4951,19 @@ pub unsafe fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let c = int8x8_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
-}
-
-/// Vector bitwise bit clear
+pub unsafe fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    let not = int16x8_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+}
+
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5450,19 +4973,19 @@ pub unsafe fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let c = int8x16_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    let not = int32x4_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5472,19 +4995,19 @@ pub unsafe fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let c = int16x4_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
+    let not = int64x2_t::splat(-1);
+    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5494,19 +5017,22 @@ pub unsafe fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let c = int16x8_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_t {
+    let not = int8x16_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5516,19 +5042,22 @@ pub unsafe fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let c = int32x2_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8_t {
+    let not = int16x8_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Bitwise Select. (128-bit)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(bsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5538,19 +5067,22 @@ pub unsafe fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let c = int32x4_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    let not = int32x4_t::splat(-1);
+    transmute(simd_or(
+        simd_and(a, transmute(b)),
+        simd_and(simd_xor(a, transmute(not)), transmute(c)),
+    ))
 }
 
-/// Vector bitwise bit clear
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5560,19 +5092,19 @@ pub unsafe fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    let c = int64x1_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    let c = int8x8_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Vector bitwise bit clear
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bic)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5582,23 +5114,19 @@ pub unsafe fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let c = int64x2_t::splat(-1);
-    simd_and(simd_xor(b, transmute(c)), a)
+pub unsafe fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    let c = int8x16_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
-/// to the corresponding bit from the first source SIMD&FP register when the original
-/// destination bit was 1, otherwise from the second source SIMD&FP register.
-
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5608,22 +5136,19 @@ pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    let not = int8x8_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    let c = int16x4_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5633,22 +5158,19 @@ pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    let not = int16x4_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    let c = int16x8_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5658,22 +5180,19 @@ pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    let not = int32x2_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    let c = int32x2_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5683,22 +5202,19 @@ pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
-    let not = int64x1_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    let c = int32x4_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5708,19 +5224,19 @@ pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    let not = int8x8_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    let c = int64x1_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5730,19 +5246,19 @@ pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    let not = int16x4_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    let c = int64x2_t::splat(-1);
+    simd_or(simd_xor(b, c), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5752,19 +5268,19 @@ pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    let not = int32x2_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let c = int8x8_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5774,19 +5290,19 @@ pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t {
-    let not = int64x1_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let c = int8x16_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5796,22 +5312,19 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    let not = int32x2_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let c = int16x4_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5821,22 +5334,19 @@ pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t {
-    let not = int8x8_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let c = int16x8_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select.
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5846,22 +5356,19 @@ pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t {
-    let not = int16x4_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let c = int32x2_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select. (128-bit)
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5871,22 +5378,19 @@ pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    let not = int8x16_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let c = int32x4_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select. (128-bit)
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5896,22 +5400,19 @@ pub unsafe fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    let not = int16x8_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c = int64x1_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select. (128-bit)
+/// Vector bitwise inclusive OR NOT
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -5921,23 +5422,17 @@ pub unsafe fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    let not = int32x4_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c = int64x2_t::splat(-1);
+    simd_or(simd_xor(b, transmute(c)), a)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5946,23 +5441,17 @@ pub unsafe fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
-    let not = int64x2_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5971,20 +5460,17 @@ pub unsafe fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    let not = int8x16_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
+    static_assert!(IMM5 == 0);
+    simd_extract!(v, 0)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -5993,20 +5479,17 @@ pub unsafe fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    let not = int16x8_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6015,20 +5498,17 @@ pub unsafe fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    let not = int32x4_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6037,20 +5517,17 @@ pub unsafe fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
-    let not = int64x2_t::splat(-1);
-    simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
+pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6059,23 +5536,17 @@ pub unsafe fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_t {
-    let not = int8x16_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6084,23 +5555,17 @@ pub unsafe fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8_t {
-    let not = int16x8_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Bitwise Select. (128-bit)
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(bsl)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6109,23 +5574,17 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    let not = int32x4_t::splat(-1);
-    transmute(simd_or(
-        simd_and(a, transmute(b)),
-        simd_and(simd_xor(a, transmute(not)), transmute(c)),
-    ))
+pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6134,20 +5593,17 @@ pub unsafe fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    let c = int8x8_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6156,20 +5612,17 @@ pub unsafe fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    let c = int8x16_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
+    static_assert!(IMM5 == 0);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6178,20 +5631,17 @@ pub unsafe fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    let c = int16x4_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6200,20 +5650,17 @@ pub unsafe fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    let c = int16x8_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
+    static_assert!(IMM5 == 0);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6222,20 +5669,17 @@ pub unsafe fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    let c = int32x2_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6244,20 +5688,17 @@ pub unsafe fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    let c = int32x4_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6266,20 +5707,17 @@ pub unsafe fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    let c = int64x1_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6288,20 +5726,17 @@ pub unsafe fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    let c = int64x2_t::splat(-1);
-    simd_or(simd_xor(b, c), a)
+pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6310,20 +5745,17 @@ pub unsafe fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let c = int8x8_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6332,20 +5764,17 @@ pub unsafe fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let c = int8x16_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
+    static_assert_uimm_bits!(IMM5, 2);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6354,20 +5783,17 @@ pub unsafe fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let c = int16x4_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6376,20 +5802,17 @@ pub unsafe fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let c = int16x8_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6398,20 +5821,17 @@ pub unsafe fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let c = int32x2_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 3);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6420,20 +5840,17 @@ pub unsafe fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let c = int32x4_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6442,20 +5859,17 @@ pub unsafe fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    let c = int64x1_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Vector bitwise inclusive OR NOT
+/// Move vector element to general-purpose register
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
-)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6464,19 +5878,19 @@ pub unsafe fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let c = int64x2_t::splat(-1);
-    simd_or(simd_xor(b, transmute(c)), a)
+pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
+    static_assert_uimm_bits!(IMM5, 4);
+    simd_extract!(v, IMM5 as u32)
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6486,18 +5900,18 @@ pub unsafe fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vpmins_v8i8(a, b)
+pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
+    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6507,18 +5921,18 @@ pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    vpmins_v4i16(a, b)
+pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
+    simd_shuffle!(a, a, [4, 5, 6, 7])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6528,18 +5942,18 @@ pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    vpmins_v2i32(a, b)
+pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
+    simd_shuffle!(a, a, [2, 3])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6549,18 +5963,18 @@ pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    vpminu_v8i8(a, b)
+pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
+    int64x1_t([simd_extract!(a, 1)])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6570,18 +5984,18 @@ pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    vpminu_v4i16(a, b)
+pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
+    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6591,18 +6005,18 @@ pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    vpminu_v2i32(a, b)
+pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
+    simd_shuffle!(a, a, [4, 5, 6, 7])
 }
 
-/// Folding minimum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6612,18 +6026,18 @@ pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    vpminf_v2f32(a, b)
+pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
+    simd_shuffle!(a, a, [2, 3])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6633,18 +6047,18 @@ pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    vpmaxs_v8i8(a, b)
+pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
+    uint64x1_t([simd_extract!(a, 1)])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6654,18 +6068,18 @@ pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    vpmaxs_v4i16(a, b)
+pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
+    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6675,18 +6089,18 @@ pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    vpmaxs_v2i32(a, b)
+pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
+    simd_shuffle!(a, a, [4, 5, 6, 7])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(ext)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -6696,40 +6110,32 @@ pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    vpmaxu_v8i8(a, b)
+pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
+    simd_shuffle!(a, a, [2, 3])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "vget_low_s8", since = "1.60.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    vpmaxu_v4i16(a, b)
+pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6738,19 +6144,15 @@ pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    vpmaxu_v2i32(a, b)
+pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3])
 }
 
-/// Folding maximum of adjacent pairs
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxp)
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6759,16 +6161,15 @@ pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    vpmaxf_v2f32(a, b)
+pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
+    simd_shuffle!(a, a, [0, 1])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6777,17 +6178,15 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
+    int64x1_t([simd_extract!(a, 0)])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6796,17 +6195,15 @@ pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
-    static_assert!(IMM5 == 0);
-    simd_extract!(v, 0)
+pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6815,17 +6212,15 @@ pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6834,17 +6229,15 @@ pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
+    simd_shuffle!(a, a, [0, 1])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6853,17 +6246,15 @@ pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
+    uint64x1_t([simd_extract!(a, 0)])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6872,17 +6263,15 @@ pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6891,17 +6280,15 @@ pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
+    simd_shuffle!(a, a, [0, 1, 2, 3])
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6910,17 +6297,19 @@ pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
+    simd_shuffle!(a, a, [0, 1])
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6929,17 +6318,19 @@ pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
+    int8x16_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6948,17 +6339,19 @@ pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
-    static_assert!(IMM5 == 0);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t {
+    int16x8_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6967,17 +6360,19 @@ pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t {
+    int32x4_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -6986,17 +6381,19 @@ pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
-    static_assert!(IMM5 == 0);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_s64(value: i64) -> int64x2_t {
+    int64x2_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7005,17 +6402,19 @@ pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
+    uint8x16_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7024,17 +6423,19 @@ pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t {
+    uint16x8_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7043,17 +6444,19 @@ pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t {
+    uint32x4_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7062,17 +6465,19 @@ pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_u64(value: u64) -> uint64x2_t {
+    uint64x2_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7081,17 +6486,19 @@ pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t {
+    poly8x16_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7100,17 +6507,19 @@ pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
-    static_assert_uimm_bits!(IMM5, 2);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t {
+    poly16x8_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7119,36 +6528,39 @@ pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdupq_n_f32(value: f32) -> float32x4_t {
+    float32x4_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
+///
+/// Private vfp4 version used by FMA intriniscs because LLVM does
+/// not inline the non-vfp4 version in vfp4 functions.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
 )]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+unsafe fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
+    float32x4_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7157,36 +6569,19 @@ pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
-    static_assert_uimm_bits!(IMM5, 3);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t {
+    int8x8_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
 )]
-pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    simd_extract!(v, IMM5 as u32)
-}
-
-/// Move vector element to general-purpose register
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7195,17 +6590,19 @@ pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t {
+    int16x4_t::splat(value)
 }
 
-/// Move vector element to general-purpose register
+/// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7214,9 +6611,8 @@ pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
-    static_assert_uimm_bits!(IMM5, 4);
-    simd_extract!(v, IMM5 as u32)
+pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t {
+    int32x2_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7226,7 +6622,7 @@ pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7236,18 +6632,18 @@ pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
-    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vdup_n_s64(value: i64) -> int64x1_t {
+    int64x1_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7257,18 +6653,18 @@ pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
-    simd_shuffle!(a, a, [4, 5, 6, 7])
+pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t {
+    uint8x8_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7278,18 +6674,18 @@ pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
-    simd_shuffle!(a, a, [2, 3])
+pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t {
+    uint16x4_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7299,8 +6695,8 @@ pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
-    int64x1_t([simd_extract!(a, 1)])
+pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t {
+    uint32x2_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7310,7 +6706,7 @@ pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7320,18 +6716,18 @@ pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
-    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
+pub unsafe fn vdup_n_u64(value: u64) -> uint64x1_t {
+    uint64x1_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7341,18 +6737,18 @@ pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
-    simd_shuffle!(a, a, [4, 5, 6, 7])
+pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t {
+    poly8x8_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7362,18 +6758,18 @@ pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
-    simd_shuffle!(a, a, [2, 3])
+pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t {
+    poly16x4_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7383,39 +6779,38 @@ pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
-    uint64x1_t([simd_extract!(a, 1)])
+pub unsafe fn vdup_n_f32(value: f32) -> float32x2_t {
+    float32x2_t::splat(value)
 }
 
 /// Duplicate vector element to vector or scalar
+///
+/// Private vfp4 version used by FMA intriniscs because LLVM does
+/// not inline the non-vfp4 version in vfp4 functions.
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
-    simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
+unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
+    float32x2_t::splat(value)
 }
 
-/// Duplicate vector element to vector or scalar
+/// Load SIMD&FP register (immediate offset)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7425,18 +6820,18 @@ pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
-    simd_shuffle!(a, a, [4, 5, 6, 7])
+pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
+    *a
 }
 
-/// Duplicate vector element to vector or scalar
+/// Store SIMD&FP register (immediate offset)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ext)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7446,32 +6841,40 @@ pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
-    simd_shuffle!(a, a, [2, 3])
+pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
+    *a = b;
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "vget_low_s8", since = "1.60.0")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vmov_n_s8(value: i8) -> int8x8_t {
+    vdup_n_s8(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7480,15 +6883,19 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3])
+pub unsafe fn vmov_n_s16(value: i16) -> int16x4_t {
+    vdup_n_s16(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7497,15 +6904,19 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
-    simd_shuffle!(a, a, [0, 1])
+pub unsafe fn vmov_n_s32(value: i32) -> int32x2_t {
+    vdup_n_s32(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7514,15 +6925,19 @@ pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
-    int64x1_t([simd_extract!(a, 0)])
+pub unsafe fn vmov_n_s64(value: i64) -> int64x1_t {
+    vdup_n_s64(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7531,15 +6946,19 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vmov_n_u8(value: u8) -> uint8x8_t {
+    vdup_n_u8(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7548,15 +6967,19 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3])
+pub unsafe fn vmov_n_u16(value: u16) -> uint16x4_t {
+    vdup_n_u16(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7565,15 +6988,19 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
-    simd_shuffle!(a, a, [0, 1])
+pub unsafe fn vmov_n_u32(value: u32) -> uint32x2_t {
+    vdup_n_u32(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmov)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7582,15 +7009,19 @@ pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
-    uint64x1_t([simd_extract!(a, 0)])
+pub unsafe fn vmov_n_u64(value: u64) -> uint64x1_t {
+    vdup_n_u64(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7599,15 +7030,19 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
+pub unsafe fn vmov_n_p8(value: p8) -> poly8x8_t {
+    vdup_n_p8(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7616,15 +7051,19 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
-    simd_shuffle!(a, a, [0, 1, 2, 3])
+pub unsafe fn vmov_n_p16(value: p16) -> poly16x4_t {
+    vdup_n_p16(value)
 }
 
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
+)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7633,8 +7072,8 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
-    simd_shuffle!(a, a, [0, 1])
+pub unsafe fn vmov_n_f32(value: f32) -> float32x2_t {
+    vdup_n_f32(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7654,8 +7093,8 @@ pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
-    int8x16_t::splat(value)
+pub unsafe fn vmovq_n_s8(value: i8) -> int8x16_t {
+    vdupq_n_s8(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7675,8 +7114,8 @@ pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t {
-    int16x8_t::splat(value)
+pub unsafe fn vmovq_n_s16(value: i16) -> int16x8_t {
+    vdupq_n_s16(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7696,9 +7135,9 @@ pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t {
-    int32x4_t::splat(value)
-}
+pub unsafe fn vmovq_n_s32(value: i32) -> int32x4_t {
+    vdupq_n_s32(value)
+}
 
 /// Duplicate vector element to vector or scalar
 #[inline]
@@ -7717,8 +7156,8 @@ pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_s64(value: i64) -> int64x2_t {
-    int64x2_t::splat(value)
+pub unsafe fn vmovq_n_s64(value: i64) -> int64x2_t {
+    vdupq_n_s64(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7738,8 +7177,8 @@ pub unsafe fn vdupq_n_s64(value: i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
-    uint8x16_t::splat(value)
+pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
+    vdupq_n_u8(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7759,8 +7198,8 @@ pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t {
-    uint16x8_t::splat(value)
+pub unsafe fn vmovq_n_u16(value: u16) -> uint16x8_t {
+    vdupq_n_u16(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7780,8 +7219,8 @@ pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t {
-    uint32x4_t::splat(value)
+pub unsafe fn vmovq_n_u32(value: u32) -> uint32x4_t {
+    vdupq_n_u32(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7801,8 +7240,8 @@ pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_u64(value: u64) -> uint64x2_t {
-    uint64x2_t::splat(value)
+pub unsafe fn vmovq_n_u64(value: u64) -> uint64x2_t {
+    vdupq_n_u64(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7822,8 +7261,8 @@ pub unsafe fn vdupq_n_u64(value: u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t {
-    poly8x16_t::splat(value)
+pub unsafe fn vmovq_n_p8(value: p8) -> poly8x16_t {
+    vdupq_n_p8(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7843,8 +7282,8 @@ pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t {
-    poly16x8_t::splat(value)
+pub unsafe fn vmovq_n_p16(value: p16) -> poly16x8_t {
+    vdupq_n_p16(value)
 }
 
 /// Duplicate vector element to vector or scalar
@@ -7864,39 +7303,43 @@ pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdupq_n_f32(value: f32) -> float32x4_t {
-    float32x4_t::splat(value)
+pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
+    vdupq_n_f32(value)
 }
 
-/// Duplicate vector element to vector or scalar
-///
-/// Private vfp4 version used by FMA intriniscs because LLVM does
-/// not inline the non-vfp4 version in vfp4 functions.
+/// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr("nop", N = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-unsafe fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
-    float32x4_t::splat(value)
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Duplicate vector element to vector or scalar
+/// Extract vector from pair of vectors
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr("nop", N = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -7905,18 +7348,19 @@ unsafe fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t {
-    int8x8_t::splat(value)
+pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t {
+    static_assert!(N == 0);
+    a
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7926,18 +7370,18 @@ pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t {
-    int16x4_t::splat(value)
+pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7947,18 +7391,18 @@ pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t {
-    int32x2_t::splat(value)
+pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7968,18 +7412,18 @@ pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_s64(value: i64) -> int64x1_t {
-    int64x1_t::splat(value)
+pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -7989,18 +7433,18 @@ pub unsafe fn vdup_n_s64(value: i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t {
-    uint8x8_t::splat(value)
+pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8010,18 +7454,18 @@ pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t {
-    uint16x4_t::splat(value)
+pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev16)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8031,18 +7475,18 @@ pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t {
-    uint32x2_t::splat(value)
+pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8052,18 +7496,18 @@ pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_u64(value: u64) -> uint64x1_t {
-    uint64x1_t::splat(value)
+pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8073,18 +7517,18 @@ pub unsafe fn vdup_n_u64(value: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t {
-    poly8x8_t::splat(value)
+pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8094,18 +7538,18 @@ pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t {
-    poly16x4_t::splat(value)
+pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8115,38 +7559,39 @@ pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vdup_n_f32(value: f32) -> float32x2_t {
-    float32x2_t::splat(value)
+pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
 }
 
-/// Duplicate vector element to vector or scalar
-///
-/// Private vfp4 version used by FMA intriniscs because LLVM does
-/// not inline the non-vfp4 version in vfp4 functions.
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
-    float32x2_t::splat(value)
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Load SIMD&FP register (immediate offset)
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8156,18 +7601,18 @@ unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
-    *a
+pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Store SIMD&FP register (immediate offset)
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8177,18 +7622,18 @@ pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
-    *a = b;
+pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8198,18 +7643,18 @@ pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_s8(value: i8) -> int8x8_t {
-    vdup_n_s8(value)
+pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8219,18 +7664,18 @@ pub unsafe fn vmov_n_s8(value: i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_s16(value: i16) -> int16x4_t {
-    vdup_n_s16(value)
+pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8240,18 +7685,18 @@ pub unsafe fn vmov_n_s16(value: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_s32(value: i32) -> int32x2_t {
-    vdup_n_s32(value)
+pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8261,18 +7706,18 @@ pub unsafe fn vmov_n_s32(value: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_s64(value: i64) -> int64x1_t {
-    vdup_n_s64(value)
+pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev32)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8282,39 +7727,18 @@ pub unsafe fn vmov_n_s64(value: i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_u8(value: u8) -> uint8x8_t {
-    vdup_n_u8(value)
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vmov_n_u16(value: u16) -> uint16x4_t {
-    vdup_n_u16(value)
+pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8324,18 +7748,18 @@ pub unsafe fn vmov_n_u16(value: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_u32(value: u32) -> uint32x2_t {
-    vdup_n_u32(value)
+pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8345,18 +7769,18 @@ pub unsafe fn vmov_n_u32(value: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_u64(value: u64) -> uint64x1_t {
-    vdup_n_u64(value)
+pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8366,18 +7790,18 @@ pub unsafe fn vmov_n_u64(value: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_p8(value: p8) -> poly8x8_t {
-    vdup_n_p8(value)
+pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8387,18 +7811,18 @@ pub unsafe fn vmov_n_p8(value: p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_p16(value: p16) -> poly16x4_t {
-    vdup_n_p16(value)
+pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8408,18 +7832,18 @@ pub unsafe fn vmov_n_p16(value: p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmov_n_f32(value: f32) -> float32x2_t {
-    vdup_n_f32(value)
+pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t {
+    simd_shuffle!(a, a, [1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8429,18 +7853,18 @@ pub unsafe fn vmov_n_f32(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_s8(value: i8) -> int8x16_t {
-    vdupq_n_s8(value)
+pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8450,18 +7874,18 @@ pub unsafe fn vmovq_n_s8(value: i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_s16(value: i16) -> int16x8_t {
-    vdupq_n_s16(value)
+pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8471,18 +7895,18 @@ pub unsafe fn vmovq_n_s16(value: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_s32(value: i32) -> int32x4_t {
-    vdupq_n_s32(value)
+pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8492,18 +7916,18 @@ pub unsafe fn vmovq_n_s32(value: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_s64(value: i64) -> int64x2_t {
-    vdupq_n_s64(value)
+pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8513,18 +7937,18 @@ pub unsafe fn vmovq_n_s64(value: i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
-    vdupq_n_u8(value)
+pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8534,18 +7958,18 @@ pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_u16(value: u16) -> uint16x8_t {
-    vdupq_n_u16(value)
+pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
+    simd_shuffle!(a, a, [1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8555,18 +7979,18 @@ pub unsafe fn vmovq_n_u16(value: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_u32(value: u32) -> uint32x4_t {
-    vdupq_n_u32(value)
+pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8576,18 +8000,18 @@ pub unsafe fn vmovq_n_u32(value: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_u64(value: u64) -> uint64x2_t {
-    vdupq_n_u64(value)
+pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t {
+    simd_shuffle!(a, a, [1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8597,18 +8021,18 @@ pub unsafe fn vmovq_n_u64(value: u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_p8(value: p8) -> poly8x16_t {
-    vdupq_n_p8(value)
+pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
+    simd_shuffle!(a, a, [1, 0, 3, 2])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8618,18 +8042,18 @@ pub unsafe fn vmovq_n_p8(value: p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_p16(value: p16) -> poly16x8_t {
-    vdupq_n_p16(value)
+pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
 }
 
-/// Duplicate vector element to vector or scalar
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(rev64)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -8639,20 +8063,19 @@ pub unsafe fn vmovq_n_p16(value: p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
-    vdupq_n_f32(value)
+pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
+    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
 }
 
-/// Extract vector from pair of vectors
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr("nop", N = 0)
+    assert_instr(rev64)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8661,21 +8084,19 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0])
 }
 
-/// Extract vector from pair of vectors
+/// Reversing vector elements (swap endianness)
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr("nop", N = 0)
+    assert_instr(rev64)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -8684,3740 +8105,1656 @@ pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N == 0);
-    a
+pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
+    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
 }
 
-/// Population count per byte.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcnt_s8(a: int8x8_t) -> int8x8_t {
-    vcnt_s8_(a)
-}
-/// Population count per byte.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcntq_s8(a: int8x16_t) -> int8x16_t {
-    vcntq_s8_(a)
-}
-/// Population count per byte.
+/* FIXME: 16-bit float
+/// Vector combine
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
+#[cfg_attr(test, assert_instr(nop))]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
-    transmute(vcnt_s8_(transmute(a)))
+)] pub unsafe fn vcombine_f16 ( low: float16x4_t,  high: float16x4_t) -> float16x8_t {
+    simd_shuffle!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
 }
-/// Population count per byte.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
-    transmute(vcntq_s8_(transmute(a)))
-}
-/// Population count per byte.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
-    transmute(vcnt_s8_(transmute(a)))
-}
-/// Population count per byte.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
-    transmute(vcntq_s8_(transmute(a)))
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev16)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev32)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t {
-    simd_shuffle!(a, a, [1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t {
-    simd_shuffle!(a, a, [1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t {
-    simd_shuffle!(a, a, [1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t {
-    simd_shuffle!(a, a, [1, 0, 3, 2])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t {
-    simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0])
-}
-
-/// Reversing vector elements (swap endianness)
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rev64)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t {
-    simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4])
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_s8_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_s8_(b), a)
-    }
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_s16_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_s16_(b), a)
-    }
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_s32_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_s32_(b), a)
-    }
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_s8_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_s8_(b), a)
-    }
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_s16_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_s16_(b), a)
-    }
-}
-
-/// Signed Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_s32_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_s32_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_u8_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_u8_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_u16_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_u16_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadal_u32_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddl_u32_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_u8_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_u8_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_u16_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_u16_(b), a)
-    }
-}
-
-/// Unsigned Add and Accumulate Long Pairwise.
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    #[cfg(target_arch = "arm")]
-    {
-        crate::core_arch::arm::neon::vpadalq_u32_(a, b)
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    {
-        simd_add(vpaddlq_u32_(b), a)
-    }
-}
-
-/// 8-bit integer matrix multiply-accumulate
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smmla)
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
-        )]
-        fn vmmlaq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    vmmlaq_s32_(a, b, c)
-}
-
-/// 8-bit integer matrix multiply-accumulate
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ummla)
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
-        )]
-        fn vmmlaq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
-    }
-    vmmlaq_u32_(a, b, c)
-}
-
-/// Unsigned and signed 8-bit integer matrix multiply-accumulate
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(usmmla)
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
-)]
-pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8"
-        )]
-        fn vusmmlaq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    vusmmlaq_s32_(a, b, c)
-}
-
-/* FIXME: 16-bit float
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)] pub unsafe fn vcombine_f16 ( low: float16x4_t,  high: float16x4_t) -> float16x8_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
-}
-*/
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t {
-    simd_shuffle!(
-        low,
-        high,
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    )
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t {
-    simd_shuffle!(
-        low,
-        high,
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    )
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t {
-    simd_shuffle!(low, high, [0, 1])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t {
-    simd_shuffle!(
-        low,
-        high,
-        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-    )
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3, 4, 5, 6, 7])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mov)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t {
-    simd_shuffle!(low, high, [0, 1, 2, 3])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t {
-    simd_shuffle!(low, high, [0, 1])
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(test, assert_instr(nop))]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t {
-    simd_shuffle!(low, high, [0, 1])
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    use crate::core_arch::aarch64::*;
-    #[cfg(target_arch = "arm")]
-    use crate::core_arch::arm::*;
-    use crate::core_arch::arm_shared::test_support::*;
-    use crate::core_arch::simd::*;
-    use std::{mem::transmute, vec::Vec};
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s8() {
-        let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: i8 = 42;
-        let e = i8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: i8x8 = transmute(vld1_lane_s8::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let elem: i8 = 42;
-        let e = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: i8x16 = transmute(vld1q_lane_s8::<15>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let elem: i16 = 42;
-        let e = i16x4::new(0, 1, 2, 42);
-        let r: i16x4 = transmute(vld1_lane_s16::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s16() {
-        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: i16 = 42;
-        let e = i16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: i16x8 = transmute(vld1q_lane_s16::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s32() {
-        let a = i32x2::new(0, 1);
-        let elem: i32 = 42;
-        let e = i32x2::new(0, 42);
-        let r: i32x2 = transmute(vld1_lane_s32::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s32() {
-        let a = i32x4::new(0, 1, 2, 3);
-        let elem: i32 = 42;
-        let e = i32x4::new(0, 1, 2, 42);
-        let r: i32x4 = transmute(vld1q_lane_s32::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s64() {
-        let a = i64x1::new(0);
-        let elem: i64 = 42;
-        let e = i64x1::new(42);
-        let r: i64x1 = transmute(vld1_lane_s64::<0>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s64() {
-        let a = i64x2::new(0, 1);
-        let elem: i64 = 42;
-        let e = i64x2::new(0, 42);
-        let r: i64x2 = transmute(vld1q_lane_s64::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u8() {
-        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: u8 = 42;
-        let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u8x8 = transmute(vld1_lane_u8::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let elem: u8 = 42;
-        let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: u8x16 = transmute(vld1q_lane_u8::<15>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u16() {
-        let a = u16x4::new(0, 1, 2, 3);
-        let elem: u16 = 42;
-        let e = u16x4::new(0, 1, 2, 42);
-        let r: u16x4 = transmute(vld1_lane_u16::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: u16 = 42;
-        let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u16x8 = transmute(vld1q_lane_u16::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u32() {
-        let a = u32x2::new(0, 1);
-        let elem: u32 = 42;
-        let e = u32x2::new(0, 42);
-        let r: u32x2 = transmute(vld1_lane_u32::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u32() {
-        let a = u32x4::new(0, 1, 2, 3);
-        let elem: u32 = 42;
-        let e = u32x4::new(0, 1, 2, 42);
-        let r: u32x4 = transmute(vld1q_lane_u32::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u64() {
-        let a = u64x1::new(0);
-        let elem: u64 = 42;
-        let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_lane_u64::<0>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u64() {
-        let a = u64x2::new(0, 1);
-        let elem: u64 = 42;
-        let e = u64x2::new(0, 42);
-        let r: u64x2 = transmute(vld1q_lane_u64::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_p8() {
-        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: p8 = 42;
-        let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u8x8 = transmute(vld1_lane_p8::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_p8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let elem: p8 = 42;
-        let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: u8x16 = transmute(vld1q_lane_p8::<15>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_p16() {
-        let a = u16x4::new(0, 1, 2, 3);
-        let elem: p16 = 42;
-        let e = u16x4::new(0, 1, 2, 42);
-        let r: u16x4 = transmute(vld1_lane_p16::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_p16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let elem: p16 = 42;
-        let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u16x8 = transmute(vld1q_lane_p16::<7>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1_lane_p64() {
-        let a = u64x1::new(0);
-        let elem: u64 = 42;
-        let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_lane_p64::<0>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1q_lane_p64() {
-        let a = u64x2::new(0, 1);
-        let elem: u64 = 42;
-        let e = u64x2::new(0, 42);
-        let r: u64x2 = transmute(vld1q_lane_p64::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_f32() {
-        let a = f32x2::new(0., 1.);
-        let elem: f32 = 42.;
-        let e = f32x2::new(0., 42.);
-        let r: f32x2 = transmute(vld1_lane_f32::<1>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_f32() {
-        let a = f32x4::new(0., 1., 2., 3.);
-        let elem: f32 = 42.;
-        let e = f32x4::new(0., 1., 2., 42.);
-        let r: f32x4 = transmute(vld1q_lane_f32::<3>(&elem, transmute(a)));
-        assert_eq!(r, e)
-    }
+*/
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    use crate::core_arch::aarch64::*;
+    #[cfg(target_arch = "arm")]
+    use crate::core_arch::arm::*;
+    use crate::core_arch::arm_shared::test_support::*;
+    use crate::core_arch::simd::*;
+    use std::{mem::transmute, vec::Vec};
+    use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s8() {
+    unsafe fn test_vld1_lane_s8() {
+        let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: i8 = 42;
-        let e = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: i8x8 = transmute(vld1_dup_s8(&elem));
+        let e = i8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: i8x8 = transmute(vld1_lane_s8::<7>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s8() {
+    unsafe fn test_vld1q_lane_s8() {
+        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: i8 = 42;
-        let e = i8x16::new(
-            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-        );
-        let r: i8x16 = transmute(vld1q_dup_s8(&elem));
+        let e = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
+        let r: i8x16 = transmute(vld1q_lane_s8::<15>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s16() {
+    unsafe fn test_vld1_lane_s16() {
+        let a = i16x4::new(0, 1, 2, 3);
         let elem: i16 = 42;
-        let e = i16x4::new(42, 42, 42, 42);
-        let r: i16x4 = transmute(vld1_dup_s16(&elem));
+        let e = i16x4::new(0, 1, 2, 42);
+        let r: i16x4 = transmute(vld1_lane_s16::<3>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s16() {
+    unsafe fn test_vld1q_lane_s16() {
+        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: i16 = 42;
-        let e = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: i16x8 = transmute(vld1q_dup_s16(&elem));
+        let e = i16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: i16x8 = transmute(vld1q_lane_s16::<7>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s32() {
+    unsafe fn test_vld1_lane_s32() {
+        let a = i32x2::new(0, 1);
         let elem: i32 = 42;
-        let e = i32x2::new(42, 42);
-        let r: i32x2 = transmute(vld1_dup_s32(&elem));
+        let e = i32x2::new(0, 42);
+        let r: i32x2 = transmute(vld1_lane_s32::<1>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s32() {
+    unsafe fn test_vld1q_lane_s32() {
+        let a = i32x4::new(0, 1, 2, 3);
         let elem: i32 = 42;
-        let e = i32x4::new(42, 42, 42, 42);
-        let r: i32x4 = transmute(vld1q_dup_s32(&elem));
+        let e = i32x4::new(0, 1, 2, 42);
+        let r: i32x4 = transmute(vld1q_lane_s32::<3>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s64() {
+    unsafe fn test_vld1_lane_s64() {
+        let a = i64x1::new(0);
         let elem: i64 = 42;
         let e = i64x1::new(42);
-        let r: i64x1 = transmute(vld1_dup_s64(&elem));
+        let r: i64x1 = transmute(vld1_lane_s64::<0>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s64() {
+    unsafe fn test_vld1q_lane_s64() {
+        let a = i64x2::new(0, 1);
         let elem: i64 = 42;
-        let e = i64x2::new(42, 42);
-        let r: i64x2 = transmute(vld1q_dup_s64(&elem));
+        let e = i64x2::new(0, 42);
+        let r: i64x2 = transmute(vld1q_lane_s64::<1>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u8() {
+    unsafe fn test_vld1_lane_u8() {
+        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: u8 = 42;
-        let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u8x8 = transmute(vld1_dup_u8(&elem));
+        let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: u8x8 = transmute(vld1_lane_u8::<7>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u8() {
+    unsafe fn test_vld1q_lane_u8() {
+        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: u8 = 42;
-        let e = u8x16::new(
-            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-        );
-        let r: u8x16 = transmute(vld1q_dup_u8(&elem));
+        let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
+        let r: u8x16 = transmute(vld1q_lane_u8::<15>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u16() {
+    unsafe fn test_vld1_lane_u16() {
+        let a = u16x4::new(0, 1, 2, 3);
         let elem: u16 = 42;
-        let e = u16x4::new(42, 42, 42, 42);
-        let r: u16x4 = transmute(vld1_dup_u16(&elem));
+        let e = u16x4::new(0, 1, 2, 42);
+        let r: u16x4 = transmute(vld1_lane_u16::<3>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u16() {
+    unsafe fn test_vld1q_lane_u16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: u16 = 42;
-        let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u16x8 = transmute(vld1q_dup_u16(&elem));
+        let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: u16x8 = transmute(vld1q_lane_u16::<7>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u32() {
+    unsafe fn test_vld1_lane_u32() {
+        let a = u32x2::new(0, 1);
         let elem: u32 = 42;
-        let e = u32x2::new(42, 42);
-        let r: u32x2 = transmute(vld1_dup_u32(&elem));
+        let e = u32x2::new(0, 42);
+        let r: u32x2 = transmute(vld1_lane_u32::<1>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u32() {
+    unsafe fn test_vld1q_lane_u32() {
+        let a = u32x4::new(0, 1, 2, 3);
         let elem: u32 = 42;
-        let e = u32x4::new(42, 42, 42, 42);
-        let r: u32x4 = transmute(vld1q_dup_u32(&elem));
+        let e = u32x4::new(0, 1, 2, 42);
+        let r: u32x4 = transmute(vld1q_lane_u32::<3>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u64() {
+    unsafe fn test_vld1_lane_u64() {
+        let a = u64x1::new(0);
         let elem: u64 = 42;
         let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_dup_u64(&elem));
+        let r: u64x1 = transmute(vld1_lane_u64::<0>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u64() {
+    unsafe fn test_vld1q_lane_u64() {
+        let a = u64x2::new(0, 1);
         let elem: u64 = 42;
-        let e = u64x2::new(42, 42);
-        let r: u64x2 = transmute(vld1q_dup_u64(&elem));
+        let e = u64x2::new(0, 42);
+        let r: u64x2 = transmute(vld1q_lane_u64::<1>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_p8() {
+    unsafe fn test_vld1_lane_p8() {
+        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: p8 = 42;
-        let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u8x8 = transmute(vld1_dup_p8(&elem));
+        let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: u8x8 = transmute(vld1_lane_p8::<7>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_p8() {
+    unsafe fn test_vld1q_lane_p8() {
+        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: p8 = 42;
-        let e = u8x16::new(
-            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-        );
-        let r: u8x16 = transmute(vld1q_dup_p8(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_p16() {
-        let elem: p16 = 42;
-        let e = u16x4::new(42, 42, 42, 42);
-        let r: u16x4 = transmute(vld1_dup_p16(&elem));
+        let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
+        let r: u8x16 = transmute(vld1q_lane_p8::<15>(&elem, transmute(a)));
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_p16() {
+    unsafe fn test_vld1_lane_p16() {
+        let a = u16x4::new(0, 1, 2, 3);
         let elem: p16 = 42;
-        let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u16x8 = transmute(vld1q_dup_p16(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1_dup_p64() {
-        let elem: u64 = 42;
-        let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_dup_p64(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1q_dup_p64() {
-        let elem: u64 = 42;
-        let e = u64x2::new(42, 42);
-        let r: u64x2 = transmute(vld1q_dup_p64(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_f32() {
-        let elem: f32 = 42.;
-        let e = f32x2::new(42., 42.);
-        let r: f32x2 = transmute(vld1_dup_f32(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_f32() {
-        let elem: f32 = 42.;
-        let e = f32x4::new(42., 42., 42., 42.);
-        let r: f32x4 = transmute(vld1q_dup_f32(&elem));
-        assert_eq!(r, e)
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u8() {
-        let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vget_lane_u8::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u32() {
-        let v = i32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_u32::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s32() {
-        let v = i32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_s32::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u64() {
-        let v: u64 = 1;
-        let r = vget_lane_u64::<0>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u16() {
-        let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vgetq_lane_u16::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s8() {
-        let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_s8::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_s8::<4>(transmute(v));
-        assert_eq!(r, 4);
-        let r = vget_lane_s8::<5>(transmute(v));
-        assert_eq!(r, 5);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p8() {
-        let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_p8::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_p8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_p8::<5>(transmute(v));
-        assert_eq!(r, 5);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_p16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_p16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_p16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_p16::<1>(transmute(v));
-        assert_eq!(r, 1);
+        let e = u16x4::new(0, 1, 2, 42);
+        let r: u16x4 = transmute(vld1_lane_p16::<3>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s16() {
-        let v = i16x4::new(0, 1, 2, 3);
-        let r = vget_lane_s16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_s16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_s16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_s16::<1>(transmute(v));
-        assert_eq!(r, 1);
+    unsafe fn test_vld1q_lane_p16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let elem: p16 = 42;
+        let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
+        let r: u16x8 = transmute(vld1q_lane_p16::<7>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_u16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_u16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_u16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_u16::<1>(transmute(v));
-        assert_eq!(r, 1);
+    #[simd_test(enable = "neon,aes")]
+    unsafe fn test_vld1_lane_p64() {
+        let a = u64x1::new(0);
+        let elem: u64 = 42;
+        let e = u64x1::new(42);
+        let r: u64x1 = transmute(vld1_lane_p64::<0>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_f32() {
-        let v = f32x2::new(0.0, 1.0);
-        let r = vget_lane_f32::<1>(transmute(v));
-        assert_eq!(r, 1.0);
-        let r = vget_lane_f32::<0>(transmute(v));
-        assert_eq!(r, 0.0);
+
+    #[simd_test(enable = "neon,aes")]
+    unsafe fn test_vld1q_lane_p64() {
+        let a = u64x2::new(0, 1);
+        let elem: u64 = 42;
+        let e = u64x2::new(0, 42);
+        let r: u64x2 = transmute(vld1q_lane_p64::<1>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s32() {
-        let v = i32x2::new(0, 1);
-        let r = vget_lane_s32::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vget_lane_s32::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1_lane_f32() {
+        let a = f32x2::new(0., 1.);
+        let elem: f32 = 42.;
+        let e = f32x2::new(0., 42.);
+        let r: f32x2 = transmute(vld1_lane_f32::<1>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u32() {
-        let v = u32x2::new(0, 1);
-        let r = vget_lane_u32::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vget_lane_u32::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1q_lane_f32() {
+        let a = f32x4::new(0., 1., 2., 3.);
+        let elem: f32 = 42.;
+        let e = f32x4::new(0., 1., 2., 42.);
+        let r: f32x4 = transmute(vld1q_lane_f32::<3>(&elem, transmute(a)));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s64() {
-        let v = i64x1::new(1);
-        let r = vget_lane_s64::<0>(transmute(v));
-        assert_eq!(r, 1);
+    unsafe fn test_vld1_dup_s8() {
+        let elem: i8 = 42;
+        let e = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: i8x8 = transmute(vld1_dup_s8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p64() {
-        let v = u64x1::new(1);
-        let r = vget_lane_p64::<0>(transmute(v));
-        assert_eq!(r, 1);
+    unsafe fn test_vld1q_dup_s8() {
+        let elem: i8 = 42;
+        let e = i8x16::new(
+            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        );
+        let r: i8x16 = transmute(vld1q_dup_s8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s8() {
-        let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_s8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_s8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_s8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s8::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1_dup_s16() {
+        let elem: i16 = 42;
+        let e = i16x4::new(42, 42, 42, 42);
+        let r: i16x4 = transmute(vld1_dup_s16(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_p8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_p8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p8::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1q_dup_s16() {
+        let elem: i16 = 42;
+        let e = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: i16x8 = transmute(vld1q_dup_s16(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_u8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_u8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_u8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_u8::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1_dup_s32() {
+        let elem: i32 = 42;
+        let e = i32x2::new(42, 42);
+        let r: i32x2 = transmute(vld1_dup_s32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s16() {
-        let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_s16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s16::<6>(transmute(v));
-        assert_eq!(r, 6);
-        let r = vgetq_lane_s16::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1q_dup_s32() {
+        let elem: i32 = 42;
+        let e = i32x4::new(42, 42, 42, 42);
+        let r: i32x4 = transmute(vld1q_dup_s32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p16() {
-        let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_p16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p16::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p16::<1>(transmute(v));
-        assert_eq!(r, 1);
+    unsafe fn test_vld1_dup_s64() {
+        let elem: i64 = 42;
+        let e = i64x1::new(42);
+        let r: i64x1 = transmute(vld1_dup_s64(&elem));
+        assert_eq!(r, e)
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_f32() {
-        let v = f32x4::new(0.0, 1.0, 2.0, 3.0);
-        let r = vgetq_lane_f32::<3>(transmute(v));
-        assert_eq!(r, 3.0);
-        let r = vgetq_lane_f32::<0>(transmute(v));
-        assert_eq!(r, 0.0);
-        let r = vgetq_lane_f32::<2>(transmute(v));
-        assert_eq!(r, 2.0);
-        let r = vgetq_lane_f32::<1>(transmute(v));
-        assert_eq!(r, 1.0);
+    unsafe fn test_vld1q_dup_s64() {
+        let elem: i64 = 42;
+        let e = i64x2::new(42, 42);
+        let r: i64x2 = transmute(vld1q_dup_s64(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s64() {
-        let v = i64x2::new(0, 1);
-        let r = vgetq_lane_s64::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vgetq_lane_s64::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1_dup_u8() {
+        let elem: u8 = 42;
+        let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: u8x8 = transmute(vld1_dup_u8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p64() {
-        let v = u64x2::new(0, 1);
-        let r = vgetq_lane_p64::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vgetq_lane_p64::<0>(transmute(v));
-        assert_eq!(r, 0);
+    unsafe fn test_vld1q_dup_u8() {
+        let elem: u8 = 42;
+        let e = u8x16::new(
+            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        );
+        let r: u8x16 = transmute(vld1q_dup_u8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_u16() {
+        let elem: u16 = 42;
+        let e = u16x4::new(42, 42, 42, 42);
+        let r: u16x4 = transmute(vld1_dup_u16(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u64() {
-        let a: u64x1 = u64x1::new(0);
-        let b: u64x1 = u64x1::new(1);
-        let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_u16() {
+        let elem: u16 = 42;
+        let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: u16x8 = transmute(vld1q_dup_u16(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x8 = transmute(vget_high_s8(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_u32() {
+        let elem: u32 = 42;
+        let e = u32x2::new(42, 42);
+        let r: u32x2 = transmute(vld1_dup_u32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(5, 6, 7, 8);
-        let r: i16x4 = transmute(vget_high_s16(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_u32() {
+        let elem: u32 = 42;
+        let e = u32x4::new(42, 42, 42, 42);
+        let r: u32x4 = transmute(vld1q_dup_u32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(3, 4);
-        let r: i32x2 = transmute(vget_high_s32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_u64() {
+        let elem: u64 = 42;
+        let e = u64x1::new(42);
+        let r: u64x1 = transmute(vld1_dup_u64(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(2);
-        let r: i64x1 = transmute(vget_high_s64(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_u64() {
+        let elem: u64 = 42;
+        let e = u64x2::new(42, 42);
+        let r: u64x2 = transmute(vld1q_dup_u64(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x8 = transmute(vget_high_u8(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_p8() {
+        let elem: p8 = 42;
+        let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: u8x8 = transmute(vld1_dup_p8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r: u16x4 = transmute(vget_high_u16(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_p8() {
+        let elem: p8 = 42;
+        let e = u8x16::new(
+            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+        );
+        let r: u8x16 = transmute(vld1q_dup_p8(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(3, 4);
-        let r: u32x2 = transmute(vget_high_u32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_p16() {
+        let elem: p16 = 42;
+        let e = u16x4::new(42, 42, 42, 42);
+        let r: u16x4 = transmute(vld1_dup_p16(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(2);
-        let r: u64x1 = transmute(vget_high_u64(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_p16() {
+        let elem: p16 = 42;
+        let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
+        let r: u16x8 = transmute(vld1q_dup_p16(&elem));
+        assert_eq!(r, e)
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x8 = transmute(vget_high_p8(transmute(a)));
-        assert_eq!(r, e);
+    #[simd_test(enable = "neon,aes")]
+    unsafe fn test_vld1_dup_p64() {
+        let elem: u64 = 42;
+        let e = u64x1::new(42);
+        let r: u64x1 = transmute(vld1_dup_p64(&elem));
+        assert_eq!(r, e)
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r: u16x4 = transmute(vget_high_p16(transmute(a)));
-        assert_eq!(r, e);
+    #[simd_test(enable = "neon,aes")]
+    unsafe fn test_vld1q_dup_p64() {
+        let elem: u64 = 42;
+        let e = u64x2::new(42, 42);
+        let r: u64x2 = transmute(vld1q_dup_p64(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(3.0, 4.0);
-        let r: f32x2 = transmute(vget_high_f32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1_dup_f32() {
+        let elem: f32 = 42.;
+        let e = f32x2::new(42., 42.);
+        let r: f32x2 = transmute(vld1_dup_f32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vget_low_s8(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vld1q_dup_f32() {
+        let elem: f32 = 42.;
+        let e = f32x4::new(42., 42., 42., 42.);
+        let r: f32x4 = transmute(vld1q_dup_f32(&elem));
+        assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vget_low_s16(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_u8() {
+        let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = vget_lane_u8::<1>(transmute(v));
+        assert_eq!(r, 2);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vget_low_s32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_u32() {
+        let v = i32x4::new(1, 2, 3, 4);
+        let r = vgetq_lane_u32::<1>(transmute(v));
+        assert_eq!(r, 2);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(1);
-        let r: i64x1 = transmute(vget_low_s64(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_s32() {
+        let v = i32x4::new(1, 2, 3, 4);
+        let r = vgetq_lane_s32::<1>(transmute(v));
+        assert_eq!(r, 2);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vget_low_u8(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_u64() {
+        let v: u64 = 1;
+        let r = vget_lane_u64::<0>(transmute(v));
+        assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vget_low_u16(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_u16() {
+        let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = vgetq_lane_u16::<1>(transmute(v));
+        assert_eq!(r, 2);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vget_low_u32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_s8() {
+        let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = vget_lane_s8::<2>(transmute(v));
+        assert_eq!(r, 2);
+        let r = vget_lane_s8::<4>(transmute(v));
+        assert_eq!(r, 4);
+        let r = vget_lane_s8::<5>(transmute(v));
+        assert_eq!(r, 5);
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(1);
-        let r: u64x1 = transmute(vget_low_u64(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_p8() {
+        let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = vget_lane_p8::<2>(transmute(v));
+        assert_eq!(r, 2);
+        let r = vget_lane_p8::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vget_lane_p8::<5>(transmute(v));
+        assert_eq!(r, 5);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vget_low_p8(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_p16() {
+        let v = u16x4::new(0, 1, 2, 3);
+        let r = vget_lane_p16::<2>(transmute(v));
+        assert_eq!(r, 2);
+        let r = vget_lane_p16::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vget_lane_p16::<0>(transmute(v));
+        assert_eq!(r, 0);
+        let r = vget_lane_p16::<1>(transmute(v));
+        assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vget_low_p16(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_s16() {
+        let v = i16x4::new(0, 1, 2, 3);
+        let r = vget_lane_s16::<2>(transmute(v));
+        assert_eq!(r, 2);
+        let r = vget_lane_s16::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vget_lane_s16::<0>(transmute(v));
+        assert_eq!(r, 0);
+        let r = vget_lane_s16::<1>(transmute(v));
+        assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(1.0, 2.0);
-        let r: f32x2 = transmute(vget_low_f32(transmute(a)));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_u16() {
+        let v = u16x4::new(0, 1, 2, 3);
+        let r = vget_lane_u16::<2>(transmute(v));
+        assert_eq!(r, 2);
+        let r = vget_lane_u16::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vget_lane_u16::<0>(transmute(v));
+        assert_eq!(r, 0);
+        let r = vget_lane_u16::<1>(transmute(v));
+        assert_eq!(r, 1);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vget_lane_f32() {
+        let v = f32x2::new(0.0, 1.0);
+        let r = vget_lane_f32::<1>(transmute(v));
+        assert_eq!(r, 1.0);
+        let r = vget_lane_f32::<0>(transmute(v));
+        assert_eq!(r, 0.0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s8() {
-        let v: i8 = 42;
-        let e = i8x16::new(
-            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
-        );
-        let r: i8x16 = transmute(vdupq_n_s8(v));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_s32() {
+        let v = i32x2::new(0, 1);
+        let r = vget_lane_s32::<1>(transmute(v));
+        assert_eq!(r, 1);
+        let r = vget_lane_s32::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s16() {
-        let v: i16 = 64;
-        let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i16x8 = transmute(vdupq_n_s16(v));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_u32() {
+        let v = u32x2::new(0, 1);
+        let r = vget_lane_u32::<1>(transmute(v));
+        assert_eq!(r, 1);
+        let r = vget_lane_u32::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s32() {
-        let v: i32 = 64;
-        let e = i32x4::new(64, 64, 64, 64);
-        let r: i32x4 = transmute(vdupq_n_s32(v));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_s64() {
+        let v = i64x1::new(1);
+        let r = vget_lane_s64::<0>(transmute(v));
+        assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s64() {
-        let v: i64 = 64;
-        let e = i64x2::new(64, 64);
-        let r: i64x2 = transmute(vdupq_n_s64(v));
-        assert_eq!(r, e);
+    unsafe fn test_vget_lane_p64() {
+        let v = u64x1::new(1);
+        let r = vget_lane_p64::<0>(transmute(v));
+        assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u8() {
-        let v: u8 = 64;
-        let e = u8x16::new(
-            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-        );
-        let r: u8x16 = transmute(vdupq_n_u8(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_s8() {
+        let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r = vgetq_lane_s8::<7>(transmute(v));
+        assert_eq!(r, 7);
+        let r = vgetq_lane_s8::<13>(transmute(v));
+        assert_eq!(r, 13);
+        let r = vgetq_lane_s8::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vgetq_lane_s8::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u16() {
-        let v: u16 = 64;
-        let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vdupq_n_u16(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_p8() {
+        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r = vgetq_lane_p8::<7>(transmute(v));
+        assert_eq!(r, 7);
+        let r = vgetq_lane_p8::<13>(transmute(v));
+        assert_eq!(r, 13);
+        let r = vgetq_lane_p8::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vgetq_lane_p8::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u32() {
-        let v: u32 = 64;
-        let e = u32x4::new(64, 64, 64, 64);
-        let r: u32x4 = transmute(vdupq_n_u32(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_u8() {
+        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let r = vgetq_lane_u8::<7>(transmute(v));
+        assert_eq!(r, 7);
+        let r = vgetq_lane_u8::<13>(transmute(v));
+        assert_eq!(r, 13);
+        let r = vgetq_lane_u8::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vgetq_lane_u8::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u64() {
-        let v: u64 = 64;
-        let e = u64x2::new(64, 64);
-        let r: u64x2 = transmute(vdupq_n_u64(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_s16() {
+        let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = vgetq_lane_s16::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vgetq_lane_s16::<6>(transmute(v));
+        assert_eq!(r, 6);
+        let r = vgetq_lane_s16::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_p8() {
-        let v: p8 = 64;
-        let e = u8x16::new(
-            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-        );
-        let r: u8x16 = transmute(vdupq_n_p8(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_p16() {
+        let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = vgetq_lane_p16::<3>(transmute(v));
+        assert_eq!(r, 3);
+        let r = vgetq_lane_p16::<7>(transmute(v));
+        assert_eq!(r, 7);
+        let r = vgetq_lane_p16::<1>(transmute(v));
+        assert_eq!(r, 1);
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vgetq_lane_f32() {
+        let v = f32x4::new(0.0, 1.0, 2.0, 3.0);
+        let r = vgetq_lane_f32::<3>(transmute(v));
+        assert_eq!(r, 3.0);
+        let r = vgetq_lane_f32::<0>(transmute(v));
+        assert_eq!(r, 0.0);
+        let r = vgetq_lane_f32::<2>(transmute(v));
+        assert_eq!(r, 2.0);
+        let r = vgetq_lane_f32::<1>(transmute(v));
+        assert_eq!(r, 1.0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_p16() {
-        let v: p16 = 64;
-        let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vdupq_n_p16(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_s64() {
+        let v = i64x2::new(0, 1);
+        let r = vgetq_lane_s64::<1>(transmute(v));
+        assert_eq!(r, 1);
+        let r = vgetq_lane_s64::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_f32() {
-        let v: f32 = 64.0;
-        let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
-        let r: f32x4 = transmute(vdupq_n_f32(v));
-        assert_eq!(r, e);
+    unsafe fn test_vgetq_lane_p64() {
+        let v = u64x2::new(0, 1);
+        let r = vgetq_lane_p64::<1>(transmute(v));
+        assert_eq!(r, 1);
+        let r = vgetq_lane_p64::<0>(transmute(v));
+        assert_eq!(r, 0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s8() {
-        let v: i8 = 64;
-        let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i8x8 = transmute(vdup_n_s8(v));
+    unsafe fn test_vext_s64() {
+        let a: i64x1 = i64x1::new(0);
+        let b: i64x1 = i64x1::new(1);
+        let e: i64x1 = i64x1::new(0);
+        let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s16() {
-        let v: i16 = 64;
-        let e = i16x4::new(64, 64, 64, 64);
-        let r: i16x4 = transmute(vdup_n_s16(v));
+    unsafe fn test_vext_u64() {
+        let a: u64x1 = u64x1::new(0);
+        let b: u64x1 = u64x1::new(1);
+        let e: u64x1 = u64x1::new(0);
+        let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s32() {
-        let v: i32 = 64;
-        let e = i32x2::new(64, 64);
-        let r: i32x2 = transmute(vdup_n_s32(v));
+    unsafe fn test_vget_high_s8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
+        let r: i8x8 = transmute(vget_high_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s64() {
-        let v: i64 = 64;
-        let e = i64x1::new(64);
-        let r: i64x1 = transmute(vdup_n_s64(v));
+    unsafe fn test_vget_high_s16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = i16x4::new(5, 6, 7, 8);
+        let r: i16x4 = transmute(vget_high_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u8() {
-        let v: u8 = 64;
-        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vdup_n_u8(v));
+    unsafe fn test_vget_high_s32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let e = i32x2::new(3, 4);
+        let r: i32x2 = transmute(vget_high_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u16() {
-        let v: u16 = 64;
-        let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vdup_n_u16(v));
+    unsafe fn test_vget_high_s64() {
+        let a = i64x2::new(1, 2);
+        let e = i64x1::new(2);
+        let r: i64x1 = transmute(vget_high_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u32() {
-        let v: u32 = 64;
-        let e = u32x2::new(64, 64);
-        let r: u32x2 = transmute(vdup_n_u32(v));
+    unsafe fn test_vget_high_u8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x8 = transmute(vget_high_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u64() {
-        let v: u64 = 64;
-        let e = u64x1::new(64);
-        let r: u64x1 = transmute(vdup_n_u64(v));
+    unsafe fn test_vget_high_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = u16x4::new(5, 6, 7, 8);
+        let r: u16x4 = transmute(vget_high_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_p8() {
-        let v: p8 = 64;
-        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vdup_n_p8(v));
+    unsafe fn test_vget_high_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let e = u32x2::new(3, 4);
+        let r: u32x2 = transmute(vget_high_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_p16() {
-        let v: p16 = 64;
-        let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vdup_n_p16(v));
+    unsafe fn test_vget_high_u64() {
+        let a = u64x2::new(1, 2);
+        let e = u64x1::new(2);
+        let r: u64x1 = transmute(vget_high_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_f32() {
-        let v: f32 = 64.0;
-        let e = f32x2::new(64.0, 64.0);
-        let r: f32x2 = transmute(vdup_n_f32(v));
+    unsafe fn test_vget_high_p8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
+        let r: u8x8 = transmute(vget_high_p8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vldrq_p128() {
-        let v: [p128; 2] = [1, 2];
-        let e: p128 = 2;
-        let r: p128 = vldrq_p128(v[1..].as_ptr());
+    unsafe fn test_vget_high_p16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = u16x4::new(5, 6, 7, 8);
+        let r: u16x4 = transmute(vget_high_p16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vstrq_p128() {
-        let v: [p128; 2] = [1, 2];
-        let e: p128 = 2;
-        let mut r: p128 = 1;
-        vstrq_p128(&mut r, v[1]);
+    unsafe fn test_vget_high_f32() {
+        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
+        let e = f32x2::new(3.0, 4.0);
+        let r: f32x2 = transmute(vget_high_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s8() {
-        let v: i8 = 64;
-        let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i8x8 = transmute(vmov_n_s8(v));
+    unsafe fn test_vget_low_s8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: i8x8 = transmute(vget_low_s8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s16() {
-        let v: i16 = 64;
-        let e = i16x4::new(64, 64, 64, 64);
-        let r: i16x4 = transmute(vmov_n_s16(v));
+    unsafe fn test_vget_low_s16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = i16x4::new(1, 2, 3, 4);
+        let r: i16x4 = transmute(vget_low_s16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s32() {
-        let v: i32 = 64;
-        let e = i32x2::new(64, 64);
-        let r: i32x2 = transmute(vmov_n_s32(v));
+    unsafe fn test_vget_low_s32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let e = i32x2::new(1, 2);
+        let r: i32x2 = transmute(vget_low_s32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s64() {
-        let v: i64 = 64;
-        let e = i64x1::new(64);
-        let r: i64x1 = transmute(vmov_n_s64(v));
+    unsafe fn test_vget_low_s64() {
+        let a = i64x2::new(1, 2);
+        let e = i64x1::new(1);
+        let r: i64x1 = transmute(vget_low_s64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u8() {
-        let v: u8 = 64;
-        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vmov_n_u8(v));
+    unsafe fn test_vget_low_u8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vget_low_u8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u16() {
-        let v: u16 = 64;
-        let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vmov_n_u16(v));
+    unsafe fn test_vget_low_u16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = u16x4::new(1, 2, 3, 4);
+        let r: u16x4 = transmute(vget_low_u16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u32() {
-        let v: u32 = 64;
-        let e = u32x2::new(64, 64);
-        let r: u32x2 = transmute(vmov_n_u32(v));
+    unsafe fn test_vget_low_u32() {
+        let a = u32x4::new(1, 2, 3, 4);
+        let e = u32x2::new(1, 2);
+        let r: u32x2 = transmute(vget_low_u32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u64() {
-        let v: u64 = 64;
-        let e = u64x1::new(64);
-        let r: u64x1 = transmute(vmov_n_u64(v));
+    unsafe fn test_vget_low_u64() {
+        let a = u64x2::new(1, 2);
+        let e = u64x1::new(1);
+        let r: u64x1 = transmute(vget_low_u64(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_p8() {
-        let v: p8 = 64;
-        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vmov_n_p8(v));
+    unsafe fn test_vget_low_p8() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let r: u8x8 = transmute(vget_low_p8(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_p16() {
-        let v: p16 = 64;
-        let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vmov_n_p16(v));
+    unsafe fn test_vget_low_p16() {
+        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let e = u16x4::new(1, 2, 3, 4);
+        let r: u16x4 = transmute(vget_low_p16(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_f32() {
-        let v: f32 = 64.0;
-        let e = f32x2::new(64.0, 64.0);
-        let r: f32x2 = transmute(vmov_n_f32(v));
+    unsafe fn test_vget_low_f32() {
+        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
+        let e = f32x2::new(1.0, 2.0);
+        let r: f32x2 = transmute(vget_low_f32(transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s8() {
-        let v: i8 = 64;
+    unsafe fn test_vdupq_n_s8() {
+        let v: i8 = 42;
         let e = i8x16::new(
-            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+            42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
         );
-        let r: i8x16 = transmute(vmovq_n_s8(v));
+        let r: i8x16 = transmute(vdupq_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s16() {
+    unsafe fn test_vdupq_n_s16() {
         let v: i16 = 64;
         let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i16x8 = transmute(vmovq_n_s16(v));
+        let r: i16x8 = transmute(vdupq_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s32() {
+    unsafe fn test_vdupq_n_s32() {
         let v: i32 = 64;
         let e = i32x4::new(64, 64, 64, 64);
-        let r: i32x4 = transmute(vmovq_n_s32(v));
+        let r: i32x4 = transmute(vdupq_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s64() {
+    unsafe fn test_vdupq_n_s64() {
         let v: i64 = 64;
         let e = i64x2::new(64, 64);
-        let r: i64x2 = transmute(vmovq_n_s64(v));
+        let r: i64x2 = transmute(vdupq_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u8() {
+    unsafe fn test_vdupq_n_u8() {
         let v: u8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vmovq_n_u8(v));
+        let r: u8x16 = transmute(vdupq_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u16() {
+    unsafe fn test_vdupq_n_u16() {
         let v: u16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vmovq_n_u16(v));
+        let r: u16x8 = transmute(vdupq_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u32() {
+    unsafe fn test_vdupq_n_u32() {
         let v: u32 = 64;
         let e = u32x4::new(64, 64, 64, 64);
-        let r: u32x4 = transmute(vmovq_n_u32(v));
+        let r: u32x4 = transmute(vdupq_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u64() {
+    unsafe fn test_vdupq_n_u64() {
         let v: u64 = 64;
         let e = u64x2::new(64, 64);
-        let r: u64x2 = transmute(vmovq_n_u64(v));
+        let r: u64x2 = transmute(vdupq_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_p8() {
+    unsafe fn test_vdupq_n_p8() {
         let v: p8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vmovq_n_p8(v));
+        let r: u8x16 = transmute(vdupq_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_p16() {
+    unsafe fn test_vdupq_n_p16() {
         let v: p16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vmovq_n_p16(v));
+        let r: u16x8 = transmute(vdupq_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_f32() {
+    unsafe fn test_vdupq_n_f32() {
         let v: f32 = 64.0;
         let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
-        let r: f32x4 = transmute(vmovq_n_f32(v));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u64() {
-        let v = i64x2::new(1, 2);
-        let r = vgetq_lane_u64::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s8() {
-        test_ari_s8(
-            |i, j| vadd_s8(i, j),
-            |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s8() {
-        testq_ari_s8(
-            |i, j| vaddq_s8(i, j),
-            |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s16() {
-        test_ari_s16(
-            |i, j| vadd_s16(i, j),
-            |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s16() {
-        testq_ari_s16(
-            |i, j| vaddq_s16(i, j),
-            |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s32() {
-        test_ari_s32(
-            |i, j| vadd_s32(i, j),
-            |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s32() {
-        testq_ari_s32(
-            |i, j| vaddq_s32(i, j),
-            |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u8() {
-        test_ari_u8(
-            |i, j| vadd_u8(i, j),
-            |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u8() {
-        testq_ari_u8(
-            |i, j| vaddq_u8(i, j),
-            |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u16() {
-        test_ari_u16(
-            |i, j| vadd_u16(i, j),
-            |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u16() {
-        testq_ari_u16(
-            |i, j| vaddq_u16(i, j),
-            |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u32() {
-        test_ari_u32(
-            |i, j| vadd_u32(i, j),
-            |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
-        );
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u32() {
-        testq_ari_u32(
-            |i, j| vaddq_u32(i, j),
-            |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
-        );
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_f32() {
-        test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_f32() {
-        testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s8() {
-        let v = i8::MAX;
-        let a = i8x8::new(v, v, v, v, v, v, v, v);
-        let v = 2 * (v as i16);
-        let e = i16x8::new(v, v, v, v, v, v, v, v);
-        let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s16() {
-        let v = i16::MAX;
-        let a = i16x4::new(v, v, v, v);
-        let v = 2 * (v as i32);
-        let e = i32x4::new(v, v, v, v);
-        let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s32() {
-        let v = i32::MAX;
-        let a = i32x2::new(v, v);
-        let v = 2 * (v as i64);
-        let e = i64x2::new(v, v);
-        let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u8() {
-        let v = u8::MAX;
-        let a = u8x8::new(v, v, v, v, v, v, v, v);
-        let v = 2 * (v as u16);
-        let e = u16x8::new(v, v, v, v, v, v, v, v);
-        let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a)));
+        let r: f32x4 = transmute(vdupq_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u16() {
-        let v = u16::MAX;
-        let a = u16x4::new(v, v, v, v);
-        let v = 2 * (v as u32);
-        let e = u32x4::new(v, v, v, v);
-        let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a)));
+    unsafe fn test_vdup_n_s8() {
+        let v: i8 = 64;
+        let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: i8x8 = transmute(vdup_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u32() {
-        let v = u32::MAX;
-        let a = u32x2::new(v, v);
-        let v = 2 * (v as u64);
-        let e = u64x2::new(v, v);
-        let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a)));
+    unsafe fn test_vdup_n_s16() {
+        let v: i16 = 64;
+        let e = i16x4::new(64, 64, 64, 64);
+        let r: i16x4 = transmute(vdup_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = i8::MAX;
-        let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as i16;
-        let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r: i16x8 = transmute(vaddl_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_s32() {
+        let v: i32 = 64;
+        let e = i32x2::new(64, 64);
+        let r: i32x2 = transmute(vdup_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s16() {
-        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = i16::MAX;
-        let b = i16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as i32;
-        let e = i32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r: i32x4 = transmute(vaddl_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_s64() {
+        let v: i64 = 64;
+        let e = i64x1::new(64);
+        let r: i64x1 = transmute(vdup_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s32() {
-        let a = i32x4::new(0, 1, 2, 3);
-        let x = i32::MAX;
-        let b = i32x4::new(x, x, x, x);
-        let x = x as i64;
-        let e = i64x2::new(x + 2, x + 3);
-        let r: i64x2 = transmute(vaddl_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_u8() {
+        let v: u8 = 64;
+        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u8x8 = transmute(vdup_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = u8::MAX;
-        let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as u16;
-        let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r: u16x8 = transmute(vaddl_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_u16() {
+        let v: u16 = 64;
+        let e = u16x4::new(64, 64, 64, 64);
+        let r: u16x4 = transmute(vdup_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = u16::MAX;
-        let b = u16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as u32;
-        let e = u32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r: u32x4 = transmute(vaddl_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_u32() {
+        let v: u32 = 64;
+        let e = u32x2::new(64, 64);
+        let r: u32x2 = transmute(vdup_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u32() {
-        let a = u32x4::new(0, 1, 2, 3);
-        let x = u32::MAX;
-        let b = u32x4::new(x, x, x, x);
-        let x = x as u64;
-        let e = u64x2::new(x + 2, x + 3);
-        let r: u64x2 = transmute(vaddl_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_u64() {
+        let v: u64 = 64;
+        let e = u64x1::new(64);
+        let r: u64x1 = transmute(vdup_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s8() {
-        let x = i16::MAX;
-        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = i8::MAX;
-        let b = i8x8::new(y, y, y, y, y, y, y, y);
-        let y = y as i16;
-        let e = i16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: i16x8 = transmute(vaddw_s8(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_p8() {
+        let v: p8 = 64;
+        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u8x8 = transmute(vdup_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s16() {
-        let x = i32::MAX;
-        let a = i32x4::new(x, 1, 2, 3);
-        let y = i16::MAX;
-        let b = i16x4::new(y, y, y, y);
-        let y = y as i32;
-        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: i32x4 = transmute(vaddw_s16(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_p16() {
+        let v: p16 = 64;
+        let e = u16x4::new(64, 64, 64, 64);
+        let r: u16x4 = transmute(vdup_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s32() {
-        let x = i64::MAX;
-        let a = i64x2::new(x, 1);
-        let y = i32::MAX;
-        let b = i32x2::new(y, y);
-        let y = y as i64;
-        let e = i64x2::new(x.wrapping_add(y), 1 + y);
-        let r: i64x2 = transmute(vaddw_s32(transmute(a), transmute(b)));
+    unsafe fn test_vdup_n_f32() {
+        let v: f32 = 64.0;
+        let e = f32x2::new(64.0, 64.0);
+        let r: f32x2 = transmute(vdup_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u8() {
-        let x = u16::MAX;
-        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = u8::MAX;
-        let b = u8x8::new(y, y, y, y, y, y, y, y);
-        let y = y as u16;
-        let e = u16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: u16x8 = transmute(vaddw_u8(transmute(a), transmute(b)));
+    unsafe fn test_vldrq_p128() {
+        let v: [p128; 2] = [1, 2];
+        let e: p128 = 2;
+        let r: p128 = vldrq_p128(v[1..].as_ptr());
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u16() {
-        let x = u32::MAX;
-        let a = u32x4::new(x, 1, 2, 3);
-        let y = u16::MAX;
-        let b = u16x4::new(y, y, y, y);
-        let y = y as u32;
-        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: u32x4 = transmute(vaddw_u16(transmute(a), transmute(b)));
+    unsafe fn test_vstrq_p128() {
+        let v: [p128; 2] = [1, 2];
+        let e: p128 = 2;
+        let mut r: p128 = 1;
+        vstrq_p128(&mut r, v[1]);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u32() {
-        let x = u64::MAX;
-        let a = u64x2::new(x, 1);
-        let y = u32::MAX;
-        let b = u32x2::new(y, y);
-        let y = y as u64;
-        let e = u64x2::new(x.wrapping_add(y), 1 + y);
-        let r: u64x2 = transmute(vaddw_u32(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_s8() {
+        let v: i8 = 64;
+        let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: i8x8 = transmute(vmov_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s8() {
-        let x = i16::MAX;
-        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = i8::MAX;
-        let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as i16;
-        let e = i16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: i16x8 = transmute(vaddw_high_s8(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_s16() {
+        let v: i16 = 64;
+        let e = i16x4::new(64, 64, 64, 64);
+        let r: i16x4 = transmute(vmov_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s16() {
-        let x = i32::MAX;
-        let a = i32x4::new(x, 1, 2, 3);
-        let y = i16::MAX;
-        let b = i16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as i32;
-        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: i32x4 = transmute(vaddw_high_s16(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_s32() {
+        let v: i32 = 64;
+        let e = i32x2::new(64, 64);
+        let r: i32x2 = transmute(vmov_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s32() {
-        let x = i64::MAX;
-        let a = i64x2::new(x, 1);
-        let y = i32::MAX;
-        let b = i32x4::new(0, 0, y, y);
-        let y = y as i64;
-        let e = i64x2::new(x.wrapping_add(y), 1 + y);
-        let r: i64x2 = transmute(vaddw_high_s32(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_s64() {
+        let v: i64 = 64;
+        let e = i64x1::new(64);
+        let r: i64x1 = transmute(vmov_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u8() {
-        let x = u16::MAX;
-        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = u8::MAX;
-        let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as u16;
-        let e = u16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: u16x8 = transmute(vaddw_high_u8(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_u8() {
+        let v: u8 = 64;
+        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u8x8 = transmute(vmov_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u16() {
-        let x = u32::MAX;
-        let a = u32x4::new(x, 1, 2, 3);
-        let y = u16::MAX;
-        let b = u16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as u32;
-        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: u32x4 = transmute(vaddw_high_u16(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_u16() {
+        let v: u16 = 64;
+        let e = u16x4::new(64, 64, 64, 64);
+        let r: u16x4 = transmute(vmov_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u32() {
-        let x = u64::MAX;
-        let a = u64x2::new(x, 1);
-        let y = u32::MAX;
-        let b = u32x4::new(0, 0, y, y);
-        let y = y as u64;
-        let e = u64x2::new(x.wrapping_add(y), 1 + y);
-        let r: u64x2 = transmute(vaddw_high_u32(transmute(a), transmute(b)));
+    unsafe fn test_vmov_n_u32() {
+        let v: u32 = 64;
+        let e = u32x2::new(64, 64);
+        let r: u32x2 = transmute(vmov_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_s16() {
-        let a = i16x8::new(
-            (0 << 8) + 1,
-            (1 << 8) + 1,
-            (2 << 8) + 1,
-            (3 << 8) + 1,
-            (4 << 8) + 1,
-            (5 << 8) + 1,
-            (6 << 8) + 1,
-            (7 << 8) + 1,
-        );
-        let e = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let r: i8x8 = transmute(vaddhn_s16(transmute(a), transmute(a)));
+    unsafe fn test_vmov_n_u64() {
+        let v: u64 = 64;
+        let e = u64x1::new(64);
+        let r: u64x1 = transmute(vmov_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_s32() {
-        let a = i32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1);
-        let e = i16x4::new(0, 2, 4, 6);
-        let r: i16x4 = transmute(vaddhn_s32(transmute(a), transmute(a)));
+    unsafe fn test_vmov_n_p8() {
+        let v: p8 = 64;
+        let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u8x8 = transmute(vmov_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_s64() {
-        let a = i64x2::new((0 << 32) + 1, (1 << 32) + 1);
-        let e = i32x2::new(0, 2);
-        let r: i32x2 = transmute(vaddhn_s64(transmute(a), transmute(a)));
+    unsafe fn test_vmov_n_p16() {
+        let v: p16 = 64;
+        let e = u16x4::new(64, 64, 64, 64);
+        let r: u16x4 = transmute(vmov_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_u16() {
-        let a = u16x8::new(
-            (0 << 8) + 1,
-            (1 << 8) + 1,
-            (2 << 8) + 1,
-            (3 << 8) + 1,
-            (4 << 8) + 1,
-            (5 << 8) + 1,
-            (6 << 8) + 1,
-            (7 << 8) + 1,
-        );
-        let e = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
-        let r: u8x8 = transmute(vaddhn_u16(transmute(a), transmute(a)));
+    unsafe fn test_vmov_n_f32() {
+        let v: f32 = 64.0;
+        let e = f32x2::new(64.0, 64.0);
+        let r: f32x2 = transmute(vmov_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_u32() {
-        let a = u32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1);
-        let e = u16x4::new(0, 2, 4, 6);
-        let r: u16x4 = transmute(vaddhn_u32(transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_s8() {
+        let v: i8 = 64;
+        let e = i8x16::new(
+            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        );
+        let r: i8x16 = transmute(vmovq_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_u64() {
-        let a = u64x2::new((0 << 32) + 1, (1 << 32) + 1);
-        let e = u32x2::new(0, 2);
-        let r: u32x2 = transmute(vaddhn_u64(transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_s16() {
+        let v: i16 = 64;
+        let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: i16x8 = transmute(vmovq_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_s16() {
-        let r = i8x8::splat(42);
-        let a = i16x8::new(
-            (0 << 8) + 1,
-            (1 << 8) + 1,
-            (2 << 8) + 1,
-            (3 << 8) + 1,
-            (4 << 8) + 1,
-            (5 << 8) + 1,
-            (6 << 8) + 1,
-            (7 << 8) + 1,
-        );
-        let e = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 2, 4, 6, 8, 10, 12, 14);
-        let r: i8x16 = transmute(vaddhn_high_s16(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_s32() {
+        let v: i32 = 64;
+        let e = i32x4::new(64, 64, 64, 64);
+        let r: i32x4 = transmute(vmovq_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_s32() {
-        let r = i16x4::splat(42);
-        let a = i32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1);
-        let e = i16x8::new(42, 42, 42, 42, 0, 2, 4, 6);
-        let r: i16x8 = transmute(vaddhn_high_s32(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_s64() {
+        let v: i64 = 64;
+        let e = i64x2::new(64, 64);
+        let r: i64x2 = transmute(vmovq_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_s64() {
-        let r = i32x2::splat(42);
-        let a = i64x2::new((0 << 32) + 1, (1 << 32) + 1);
-        let e = i32x4::new(42, 42, 0, 2);
-        let r: i32x4 = transmute(vaddhn_high_s64(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_u8() {
+        let v: u8 = 64;
+        let e = u8x16::new(
+            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
+        );
+        let r: u8x16 = transmute(vmovq_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_u16() {
-        let r = u8x8::splat(42);
-        let a = u16x8::new(
-            (0 << 8) + 1,
-            (1 << 8) + 1,
-            (2 << 8) + 1,
-            (3 << 8) + 1,
-            (4 << 8) + 1,
-            (5 << 8) + 1,
-            (6 << 8) + 1,
-            (7 << 8) + 1,
-        );
-        let e = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 2, 4, 6, 8, 10, 12, 14);
-        let r: u8x16 = transmute(vaddhn_high_u16(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_u16() {
+        let v: u16 = 64;
+        let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u16x8 = transmute(vmovq_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_u32() {
-        let r = u16x4::splat(42);
-        let a = u32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1);
-        let e = u16x8::new(42, 42, 42, 42, 0, 2, 4, 6);
-        let r: u16x8 = transmute(vaddhn_high_u32(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_u32() {
+        let v: u32 = 64;
+        let e = u32x4::new(64, 64, 64, 64);
+        let r: u32x4 = transmute(vmovq_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddhn_high_u64() {
-        let r = u32x2::splat(42);
-        let a = u64x2::new((0 << 32) + 1, (1 << 32) + 1);
-        let e = u32x4::new(42, 42, 0, 2);
-        let r: u32x4 = transmute(vaddhn_high_u64(transmute(r), transmute(a), transmute(a)));
+    unsafe fn test_vmovq_n_u64() {
+        let v: u64 = 64;
+        let e = u64x2::new(64, 64);
+        let r: u64x2 = transmute(vmovq_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_s16() {
-        let round_constant: i16 = (1 << 8) - 1;
-        let a = i16x8::new(
-            0 << 8,
-            1 << 8,
-            2 << 8,
-            3 << 8,
-            4 << 8,
-            5 << 8,
-            6 << 8,
-            7 << 8,
-        );
-        let b = i16x8::new(
-            0 << 8,
-            (1 << 8) + round_constant,
-            2 << 8,
-            (3 << 8) + round_constant,
-            4 << 8,
-            (5 << 8) + round_constant,
-            6 << 8,
-            (7 << 8) + round_constant,
+    unsafe fn test_vmovq_n_p8() {
+        let v: p8 = 64;
+        let e = u8x16::new(
+            64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let e = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
-        let r: i8x8 = transmute(vraddhn_s16(transmute(a), transmute(b)));
+        let r: u8x16 = transmute(vmovq_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_s32() {
-        let round_constant: i32 = (1 << 16) - 1;
-        let a = i32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16);
-        let b = i32x4::new(
-            0 << 16,
-            (1 << 16) + round_constant,
-            2 << 16,
-            (3 << 16) + round_constant,
-        );
-        let e = i16x4::new(0, 3, 4, 7);
-        let r: i16x4 = transmute(vraddhn_s32(transmute(a), transmute(b)));
+    unsafe fn test_vmovq_n_p16() {
+        let v: p16 = 64;
+        let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
+        let r: u16x8 = transmute(vmovq_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_s64() {
-        let round_constant: i64 = (1 << 32) - 1;
-        let a = i64x2::new(0 << 32, 1 << 32);
-        let b = i64x2::new(0 << 32, (1 << 32) + round_constant);
-        let e = i32x2::new(0, 3);
-        let r: i32x2 = transmute(vraddhn_s64(transmute(a), transmute(b)));
+    unsafe fn test_vmovq_n_f32() {
+        let v: f32 = 64.0;
+        let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
+        let r: f32x4 = transmute(vmovq_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_u16() {
-        let round_constant: u16 = (1 << 8) - 1;
-        let a = u16x8::new(
-            0 << 8,
-            1 << 8,
-            2 << 8,
-            3 << 8,
-            4 << 8,
-            5 << 8,
-            6 << 8,
-            7 << 8,
+    unsafe fn test_vgetq_lane_u64() {
+        let v = i64x2::new(1, 2);
+        let r = vgetq_lane_u64::<1>(transmute(v));
+        assert_eq!(r, 2);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vadd_s8() {
+        test_ari_s8(
+            |i, j| vadd_s8(i, j),
+            |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
         );
-        let b = u16x8::new(
-            0 << 8,
-            (1 << 8) + round_constant,
-            2 << 8,
-            (3 << 8) + round_constant,
-            4 << 8,
-            (5 << 8) + round_constant,
-            6 << 8,
-            (7 << 8) + round_constant,
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_s8() {
+        testq_ari_s8(
+            |i, j| vaddq_s8(i, j),
+            |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
         );
-        let e = u8x8::new(0, 3, 4, 7, 8, 11, 12, 15);
-        let r: u8x8 = transmute(vraddhn_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_u32() {
-        let round_constant: u32 = (1 << 16) - 1;
-        let a = u32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16);
-        let b = u32x4::new(
-            0 << 16,
-            (1 << 16) + round_constant,
-            2 << 16,
-            (3 << 16) + round_constant,
+    unsafe fn test_vadd_s16() {
+        test_ari_s16(
+            |i, j| vadd_s16(i, j),
+            |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
         );
-        let e = u16x4::new(0, 3, 4, 7);
-        let r: u16x4 = transmute(vraddhn_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_u64() {
-        let round_constant: u64 = (1 << 32) - 1;
-        let a = u64x2::new(0 << 32, 1 << 32);
-        let b = u64x2::new(0 << 32, (1 << 32) + round_constant);
-        let e = u32x2::new(0, 3);
-        let r: u32x2 = transmute(vraddhn_u64(transmute(a), transmute(b)));
-        assert_eq!(r, e);
+    unsafe fn test_vaddq_s16() {
+        testq_ari_s16(
+            |i, j| vaddq_s16(i, j),
+            |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
+        );
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_s16() {
-        let r = i8x8::splat(42);
-        let round_constant: i16 = (1 << 8) - 1;
-        let a = i16x8::new(
-            0 << 8,
-            1 << 8,
-            2 << 8,
-            3 << 8,
-            4 << 8,
-            5 << 8,
-            6 << 8,
-            7 << 8,
+    unsafe fn test_vadd_s32() {
+        test_ari_s32(
+            |i, j| vadd_s32(i, j),
+            |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
         );
-        let b = i16x8::new(
-            0 << 8,
-            (1 << 8) + round_constant,
-            2 << 8,
-            (3 << 8) + round_constant,
-            4 << 8,
-            (5 << 8) + round_constant,
-            6 << 8,
-            (7 << 8) + round_constant,
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_s32() {
+        testq_ari_s32(
+            |i, j| vaddq_s32(i, j),
+            |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
         );
-        let e = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 3, 4, 7, 8, 11, 12, 15);
-        let r: i8x16 = transmute(vraddhn_high_s16(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_s32() {
-        let r = i16x4::splat(42);
-        let round_constant: i32 = (1 << 16) - 1;
-        let a = i32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16);
-        let b = i32x4::new(
-            0 << 16,
-            (1 << 16) + round_constant,
-            2 << 16,
-            (3 << 16) + round_constant,
+    unsafe fn test_vadd_u8() {
+        test_ari_u8(
+            |i, j| vadd_u8(i, j),
+            |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
         );
-        let e = i16x8::new(42, 42, 42, 42, 0, 3, 4, 7);
-        let r: i16x8 = transmute(vraddhn_high_s32(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_s64() {
-        let r = i32x2::splat(42);
-        let round_constant: i64 = (1 << 32) - 1;
-        let a = i64x2::new(0 << 32, 1 << 32);
-        let b = i64x2::new(0 << 32, (1 << 32) + round_constant);
-        let e = i32x4::new(42, 42, 0, 3);
-        let r: i32x4 = transmute(vraddhn_high_s64(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
+    unsafe fn test_vaddq_u8() {
+        testq_ari_u8(
+            |i, j| vaddq_u8(i, j),
+            |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
+        );
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_u16() {
-        let r = u8x8::splat(42);
-        let round_constant: u16 = (1 << 8) - 1;
-        let a = u16x8::new(
-            0 << 8,
-            1 << 8,
-            2 << 8,
-            3 << 8,
-            4 << 8,
-            5 << 8,
-            6 << 8,
-            7 << 8,
+    unsafe fn test_vadd_u16() {
+        test_ari_u16(
+            |i, j| vadd_u16(i, j),
+            |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
         );
-        let b = u16x8::new(
-            0 << 8,
-            (1 << 8) + round_constant,
-            2 << 8,
-            (3 << 8) + round_constant,
-            4 << 8,
-            (5 << 8) + round_constant,
-            6 << 8,
-            (7 << 8) + round_constant,
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_u16() {
+        testq_ari_u16(
+            |i, j| vaddq_u16(i, j),
+            |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
         );
-        let e = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 3, 4, 7, 8, 11, 12, 15);
-        let r: u8x16 = transmute(vraddhn_high_u16(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
-
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_u32() {
-        let r = u16x4::splat(42);
-        let round_constant: u32 = (1 << 16) - 1;
-        let a = u32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16);
-        let b = u32x4::new(
-            0 << 16,
-            (1 << 16) + round_constant,
-            2 << 16,
-            (3 << 16) + round_constant,
+    unsafe fn test_vadd_u32() {
+        test_ari_u32(
+            |i, j| vadd_u32(i, j),
+            |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
+        );
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_u32() {
+        testq_ari_u32(
+            |i, j| vaddq_u32(i, j),
+            |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
         );
-        let e = u16x8::new(42, 42, 42, 42, 0, 3, 4, 7);
-        let r: u16x8 = transmute(vraddhn_high_s32(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vraddhn_high_u64() {
-        let r = u32x2::splat(42);
-        let round_constant: u64 = (1 << 32) - 1;
-        let a = u64x2::new(0 << 32, 1 << 32);
-        let b = u64x2::new(0 << 32, (1 << 32) + round_constant);
-        let e = u32x4::new(42, 42, 0, 3);
-        let r: u32x4 = transmute(vraddhn_high_s64(transmute(r), transmute(a), transmute(b)));
-        assert_eq!(r, e);
+    unsafe fn test_vadd_f32() {
+        test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
+    }
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vaddq_f32() {
+        testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_s8() {
-        let a = i8x8::new(-4, -3, -2, -1, 0, 1, 2, 3);
-        let r: i16x4 = transmute(vpaddl_s8(transmute(a)));
-        let e = i16x4::new(-7, -3, 1, 5);
+    unsafe fn test_vaddl_s8() {
+        let v = i8::MAX;
+        let a = i8x8::new(v, v, v, v, v, v, v, v);
+        let v = 2 * (v as i16);
+        let e = i16x8::new(v, v, v, v, v, v, v, v);
+        let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_s16() {
-        let a = i16x4::new(-2, -1, 0, 1);
-        let r: i32x2 = transmute(vpaddl_s16(transmute(a)));
-        let e = i32x2::new(-3, 1);
+    unsafe fn test_vaddl_s16() {
+        let v = i16::MAX;
+        let a = i16x4::new(v, v, v, v);
+        let v = 2 * (v as i32);
+        let e = i32x4::new(v, v, v, v);
+        let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_s32() {
-        let a = i32x2::new(-1, 0);
-        let r: i64x1 = transmute(vpaddl_s32(transmute(a)));
-        let e = i64x1::new(-1);
+    unsafe fn test_vaddl_s32() {
+        let v = i32::MAX;
+        let a = i32x2::new(v, v);
+        let v = 2 * (v as i64);
+        let e = i64x2::new(v, v);
+        let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_s8() {
-        let a = i8x16::new(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vpaddlq_s8(transmute(a)));
-        let e = i16x8::new(-15, -11, -7, -3, 1, 5, 9, 13);
+    unsafe fn test_vaddl_u8() {
+        let v = u8::MAX;
+        let a = u8x8::new(v, v, v, v, v, v, v, v);
+        let v = 2 * (v as u16);
+        let e = u16x8::new(v, v, v, v, v, v, v, v);
+        let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_s16() {
-        let a = i16x8::new(-4, -3, -2, -1, 0, 1, 2, 3);
-        let r: i32x4 = transmute(vpaddlq_s16(transmute(a)));
-        let e = i32x4::new(-7, -3, 1, 5);
+    unsafe fn test_vaddl_u16() {
+        let v = u16::MAX;
+        let a = u16x4::new(v, v, v, v);
+        let v = 2 * (v as u32);
+        let e = u32x4::new(v, v, v, v);
+        let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_s32() {
-        let a = i32x4::new(-2, -1, 0, 1);
-        let r: i64x2 = transmute(vpaddlq_s32(transmute(a)));
-        let e = i64x2::new(-3, 1);
+    unsafe fn test_vaddl_u32() {
+        let v = u32::MAX;
+        let a = u32x2::new(v, v);
+        let v = 2 * (v as u64);
+        let e = u64x2::new(v, v);
+        let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_u8() {
-        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX);
-        let r: u16x4 = transmute(vpaddl_u8(transmute(a)));
-        let e = u16x4::new(1, 5, 9, u8::MAX as u16 + 6);
+    unsafe fn test_vaddl_high_s8() {
+        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let x = i8::MAX;
+        let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+        let x = x as i16;
+        let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
+        let r: i16x8 = transmute(vaddl_high_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_u16() {
-        let a = u16x4::new(0, 1, 2, u16::MAX);
-        let r: u32x2 = transmute(vpaddl_u16(transmute(a)));
-        let e = u32x2::new(1, u16::MAX as u32 + 2);
+    unsafe fn test_vaddl_high_s16() {
+        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let x = i16::MAX;
+        let b = i16x8::new(x, x, x, x, x, x, x, x);
+        let x = x as i32;
+        let e = i32x4::new(x + 4, x + 5, x + 6, x + 7);
+        let r: i32x4 = transmute(vaddl_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddl_u32() {
-        let a = u32x2::new(1, u32::MAX);
-        let r: u64x1 = transmute(vpaddl_u32(transmute(a)));
-        let e = u64x1::new(u32::MAX as u64 + 1);
+    unsafe fn test_vaddl_high_s32() {
+        let a = i32x4::new(0, 1, 2, 3);
+        let x = i32::MAX;
+        let b = i32x4::new(x, x, x, x);
+        let x = x as i64;
+        let e = i64x2::new(x + 2, x + 3);
+        let r: i64x2 = transmute(vaddl_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX);
-        let r: u16x8 = transmute(vpaddlq_u8(transmute(a)));
-        let e = u16x8::new(1, 5, 9, 13, 17, 21, 25, u8::MAX as u16 + 14);
+    unsafe fn test_vaddl_high_u8() {
+        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+        let x = u8::MAX;
+        let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
+        let x = x as u16;
+        let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
+        let r: u16x8 = transmute(vaddl_high_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX);
-        let r: u32x4 = transmute(vpaddlq_u16(transmute(a)));
-        let e = u32x4::new(1, 5, 9, u16::MAX as u32 + 6);
+    unsafe fn test_vaddl_high_u16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let x = u16::MAX;
+        let b = u16x8::new(x, x, x, x, x, x, x, x);
+        let x = x as u32;
+        let e = u32x4::new(x + 4, x + 5, x + 6, x + 7);
+        let r: u32x4 = transmute(vaddl_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpaddlq_u32() {
-        let a = u32x4::new(0, 1, 2, u32::MAX);
-        let r: u64x2 = transmute(vpaddlq_u32(transmute(a)));
-        let e = u64x2::new(1, u32::MAX as u64 + 2);
+    unsafe fn test_vaddl_high_u32() {
+        let a = u32x4::new(0, 1, 2, 3);
+        let x = u32::MAX;
+        let b = u32x4::new(x, x, x, x);
+        let x = x as u64;
+        let e = u64x2::new(x + 2, x + 3);
+        let r: u64x2 = transmute(vaddl_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_s8() {
-        let a = i16x4::new(42, 42, 42, 42);
-        let b = i8x8::new(-4, -3, -2, -1, 0, 1, 2, 3);
-        let r: i16x4 = transmute(vpadal_s8(transmute(a), transmute(b)));
-        let e = i16x4::new(35, 39, 43, 47);
+    unsafe fn test_vaddw_s8() {
+        let x = i16::MAX;
+        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
+        let y = i8::MAX;
+        let b = i8x8::new(y, y, y, y, y, y, y, y);
+        let y = y as i16;
+        let e = i16x8::new(
+            x.wrapping_add(y),
+            1 + y,
+            2 + y,
+            3 + y,
+            4 + y,
+            5 + y,
+            6 + y,
+            7 + y,
+        );
+        let r: i16x8 = transmute(vaddw_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_s16() {
-        let a = i32x2::new(42, 42);
-        let b = i16x4::new(-2, -1, 0, 1);
-        let r: i32x2 = transmute(vpadal_s16(transmute(a), transmute(b)));
-        let e = i32x2::new(39, 43);
+    unsafe fn test_vaddw_s16() {
+        let x = i32::MAX;
+        let a = i32x4::new(x, 1, 2, 3);
+        let y = i16::MAX;
+        let b = i16x4::new(y, y, y, y);
+        let y = y as i32;
+        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
+        let r: i32x4 = transmute(vaddw_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_s32() {
-        let a = i64x1::new(42);
-        let b = i32x2::new(-1, 0);
-        let r: i64x1 = transmute(vpadal_s32(transmute(a), transmute(b)));
-        let e = i64x1::new(41);
+    unsafe fn test_vaddw_s32() {
+        let x = i64::MAX;
+        let a = i64x2::new(x, 1);
+        let y = i32::MAX;
+        let b = i32x2::new(y, y);
+        let y = y as i64;
+        let e = i64x2::new(x.wrapping_add(y), 1 + y);
+        let r: i64x2 = transmute(vaddw_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_s8() {
-        let a = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b = i8x16::new(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7);
-        let r: i16x8 = transmute(vpadalq_s8(transmute(a), transmute(b)));
-        let e = i16x8::new(27, 31, 35, 39, 43, 47, 51, 55);
+    unsafe fn test_vaddw_u8() {
+        let x = u16::MAX;
+        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
+        let y = u8::MAX;
+        let b = u8x8::new(y, y, y, y, y, y, y, y);
+        let y = y as u16;
+        let e = u16x8::new(
+            x.wrapping_add(y),
+            1 + y,
+            2 + y,
+            3 + y,
+            4 + y,
+            5 + y,
+            6 + y,
+            7 + y,
+        );
+        let r: u16x8 = transmute(vaddw_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_s16() {
-        let a = i32x4::new(42, 42, 42, 42);
-        let b = i16x8::new(-4, -3, -2, -1, 0, 1, 2, 3);
-        let r: i32x4 = transmute(vpadalq_s16(transmute(a), transmute(b)));
-        let e = i32x4::new(35, 39, 43, 47);
+    unsafe fn test_vaddw_u16() {
+        let x = u32::MAX;
+        let a = u32x4::new(x, 1, 2, 3);
+        let y = u16::MAX;
+        let b = u16x4::new(y, y, y, y);
+        let y = y as u32;
+        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
+        let r: u32x4 = transmute(vaddw_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_s32() {
-        let a = i64x2::new(42, 42);
-        let b = i32x4::new(-2, -1, 0, 1);
-        let r: i64x2 = transmute(vpadalq_s32(transmute(a), transmute(b)));
-        let e = i64x2::new(39, 43);
+    unsafe fn test_vaddw_u32() {
+        let x = u64::MAX;
+        let a = u64x2::new(x, 1);
+        let y = u32::MAX;
+        let b = u32x2::new(y, y);
+        let y = y as u64;
+        let e = u64x2::new(x.wrapping_add(y), 1 + y);
+        let r: u64x2 = transmute(vaddw_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_u8() {
-        let a = u16x4::new(42, 42, 42, 42);
-        let b = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX);
-        let r: u16x4 = transmute(vpadal_u8(transmute(a), transmute(b)));
-        let e = u16x4::new(43, 47, 51, u8::MAX as u16 + 48);
+    unsafe fn test_vaddw_high_s8() {
+        let x = i16::MAX;
+        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
+        let y = i8::MAX;
+        let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
+        let y = y as i16;
+        let e = i16x8::new(
+            x.wrapping_add(y),
+            1 + y,
+            2 + y,
+            3 + y,
+            4 + y,
+            5 + y,
+            6 + y,
+            7 + y,
+        );
+        let r: i16x8 = transmute(vaddw_high_s8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_u16() {
-        let a = u32x2::new(42, 42);
-        let b = u16x4::new(0, 1, 2, u16::MAX);
-        let r: u32x2 = transmute(vpadal_u16(transmute(a), transmute(b)));
-        let e = u32x2::new(43, u16::MAX as u32 + 44);
+    unsafe fn test_vaddw_high_s16() {
+        let x = i32::MAX;
+        let a = i32x4::new(x, 1, 2, 3);
+        let y = i16::MAX;
+        let b = i16x8::new(0, 0, 0, 0, y, y, y, y);
+        let y = y as i32;
+        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
+        let r: i32x4 = transmute(vaddw_high_s16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadal_u32() {
-        let a = u64x1::new(42);
-        let b = u32x2::new(1, u32::MAX);
-        let r: u64x1 = transmute(vpadal_u32(transmute(a), transmute(b)));
-        let e = u64x1::new(u32::MAX as u64 + 43);
+    unsafe fn test_vaddw_high_s32() {
+        let x = i64::MAX;
+        let a = i64x2::new(x, 1);
+        let y = i32::MAX;
+        let b = i32x4::new(0, 0, y, y);
+        let y = y as i64;
+        let e = i64x2::new(x.wrapping_add(y), 1 + y);
+        let r: i64x2 = transmute(vaddw_high_s32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_u8() {
-        let a = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let b = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX);
-        let r: u16x8 = transmute(vpadalq_u8(transmute(a), transmute(b)));
-        let e = u16x8::new(43, 47, 51, 55, 59, 63, 67, u8::MAX as u16 + 56);
+    unsafe fn test_vaddw_high_u8() {
+        let x = u16::MAX;
+        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
+        let y = u8::MAX;
+        let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
+        let y = y as u16;
+        let e = u16x8::new(
+            x.wrapping_add(y),
+            1 + y,
+            2 + y,
+            3 + y,
+            4 + y,
+            5 + y,
+            6 + y,
+            7 + y,
+        );
+        let r: u16x8 = transmute(vaddw_high_u8(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_u16() {
-        let a = u32x4::new(42, 42, 42, 42);
-        let b = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX);
-        let r: u32x4 = transmute(vpadalq_u16(transmute(a), transmute(b)));
-        let e = u32x4::new(43, 47, 51, u16::MAX as u32 + 48);
+    unsafe fn test_vaddw_high_u16() {
+        let x = u32::MAX;
+        let a = u32x4::new(x, 1, 2, 3);
+        let y = u16::MAX;
+        let b = u16x8::new(0, 0, 0, 0, y, y, y, y);
+        let y = y as u32;
+        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
+        let r: u32x4 = transmute(vaddw_high_u16(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vpadalq_u32() {
-        let a = u64x2::new(42, 42);
-        let b = u32x4::new(0, 1, 2, u32::MAX);
-        let r: u64x2 = transmute(vpadalq_u32(transmute(a), transmute(b)));
-        let e = u64x2::new(43, u32::MAX as u64 + 44);
+    unsafe fn test_vaddw_high_u32() {
+        let x = u64::MAX;
+        let a = u64x2::new(x, 1);
+        let y = u32::MAX;
+        let b = u32x4::new(0, 0, y, y);
+        let y = y as u64;
+        let e = u64x2::new(x.wrapping_add(y), 1 + y);
+        let r: u64x2 = transmute(vaddw_high_u32(transmute(a), transmute(b)));
         assert_eq!(r, e);
     }
 
@@ -13490,132 +10827,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_s8() {
-        let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
-        let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6);
-        let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_s16() {
-        let a = i16x4::new(1, 2, 3, -4);
-        let b = i16x4::new(0, 3, 2, 5);
-        let e = i16x4::new(1, -4, 0, 2);
-        let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_s32() {
-        let a = i32x2::new(1, -2);
-        let b = i32x2::new(0, 3);
-        let e = i32x2::new(-2, 0);
-        let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_u8() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6);
-        let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_u16() {
-        let a = u16x4::new(1, 2, 3, 4);
-        let b = u16x4::new(0, 3, 2, 5);
-        let e = u16x4::new(1, 3, 0, 2);
-        let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_u32() {
-        let a = u32x2::new(1, 2);
-        let b = u32x2::new(0, 3);
-        let e = u32x2::new(1, 0);
-        let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmin_f32() {
-        let a = f32x2::new(1., -2.);
-        let b = f32x2::new(0., 3.);
-        let e = f32x2::new(-2., 0.);
-        let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_s8() {
-        let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8);
-        let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9);
-        let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_s16() {
-        let a = i16x4::new(1, 2, 3, -4);
-        let b = i16x4::new(0, 3, 2, 5);
-        let e = i16x4::new(2, 3, 3, 5);
-        let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_s32() {
-        let a = i32x2::new(1, -2);
-        let b = i32x2::new(0, 3);
-        let e = i32x2::new(1, 3);
-        let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_u8() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9);
-        let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9);
-        let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_u16() {
-        let a = u16x4::new(1, 2, 3, 4);
-        let b = u16x4::new(0, 3, 2, 5);
-        let e = u16x4::new(2, 4, 3, 5);
-        let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_u32() {
-        let a = u32x2::new(1, 2);
-        let b = u32x2::new(0, 3);
-        let e = u32x2::new(2, 3);
-        let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpmax_f32() {
-        let a = f32x2::new(1., -2.);
-        let b = f32x2::new(0., 3.);
-        let e = f32x2::new(1., 3.);
-        let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vand_s8() {
         test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
@@ -14831,49 +12042,6 @@ mod tests {
         );
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_s8() {
-        let a = i8x8::new(-1, 0, 1, -2, 0, 2, -128, 127);
-        let r: i8x8 = transmute(vabs_s8(transmute(a)));
-        let e = i8x8::new(1, 0, 1, 2, 0, 2, -128, 127);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_s8() {
-        let a = i8x16::new(-1, 0, 1, -2, 0, 2, -128, 127, -1, 0, 1, -2, 0, 2, -128, 127);
-        let r: i8x16 = transmute(vabsq_s8(transmute(a)));
-        let e = i8x16::new(1, 0, 1, 2, 0, 2, -128, 127, 1, 0, 1, 2, 0, 2, -128, 127);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_s16() {
-        let a = i16x4::new(-1, 0, i16::MIN, i16::MAX);
-        let r: i16x4 = transmute(vabs_s16(transmute(a)));
-        let e = i16x4::new(1, 0, i16::MIN, i16::MAX);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_s16() {
-        let a = i16x8::new(-1, 0, i16::MIN, i16::MAX, -1, 0, i16::MIN, i16::MAX);
-        let r: i16x8 = transmute(vabsq_s16(transmute(a)));
-        let e = i16x8::new(1, 0, i16::MIN, i16::MAX, 1, 0, i16::MIN, i16::MAX);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabs_s32() {
-        let a = i32x2::new(i32::MIN, i32::MIN + 1);
-        let r: i32x2 = transmute(vabs_s32(transmute(a)));
-        let e = i32x2::new(i32::MIN, i32::MAX);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vabsq_s32() {
-        let a = i32x4::new(i32::MIN, i32::MIN + 1, 0, -1);
-        let r: i32x4 = transmute(vabsq_s32(transmute(a)));
-        let e = i32x4::new(i32::MIN, i32::MAX, 0, 1);
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vaba_s8() {
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
@@ -14987,117 +12155,6 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_s16() {
-        let a = i16x4::new(1, 2, 3, 4);
-        let b = i16x4::new(0, -1, -2, -3);
-        let r: i16x4 = transmute(vpadd_s16(transmute(a), transmute(b)));
-        let e = i16x4::new(3, 7, -1, -5);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_s32() {
-        let a = i32x2::new(1, 2);
-        let b = i32x2::new(0, -1);
-        let r: i32x2 = transmute(vpadd_s32(transmute(a), transmute(b)));
-        let e = i32x2::new(3, -1);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_s8() {
-        let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7);
-        let r: i8x8 = transmute(vpadd_s8(transmute(a), transmute(b)));
-        let e = i8x8::new(3, 7, 11, 15, -1, -5, -9, -13);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_u16() {
-        let a = u16x4::new(1, 2, 3, 4);
-        let b = u16x4::new(30, 31, 32, 33);
-        let r: u16x4 = transmute(vpadd_u16(transmute(a), transmute(b)));
-        let e = u16x4::new(3, 7, 61, 65);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_u32() {
-        let a = u32x2::new(1, 2);
-        let b = u32x2::new(30, 31);
-        let r: u32x2 = transmute(vpadd_u32(transmute(a), transmute(b)));
-        let e = u32x2::new(3, 61);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vpadd_u8() {
-        let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let b = u8x8::new(30, 31, 32, 33, 34, 35, 36, 37);
-        let r: u8x8 = transmute(vpadd_u8(transmute(a), transmute(b)));
-        let e = u8x8::new(3, 7, 11, 15, 61, 65, 69, 73);
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcnt_s8() {
-        let a: i8x8 = transmute(u8x8::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111,
-        ));
-        let e = i8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
-        let r: i8x8 = transmute(vcnt_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcntq_s8() {
-        let a: i8x16 = transmute(u8x16::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
-            0b11100000, 0b00010000,
-        ));
-        let e = i8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
-        let r: i8x16 = transmute(vcntq_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcnt_u8() {
-        let a = u8x8::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111,
-        );
-        let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
-        let r: u8x8 = transmute(vcnt_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcntq_u8() {
-        let a = u8x16::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
-            0b11100000, 0b00010000,
-        );
-        let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
-        let r: u8x16 = transmute(vcntq_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcnt_p8() {
-        let a = u8x8::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111,
-        );
-        let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6);
-        let r: u8x8 = transmute(vcnt_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcntq_p8() {
-        let a = u8x16::new(
-            0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000,
-            0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111,
-            0b11100000, 0b00010000,
-        );
-        let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1);
-        let r: u8x16 = transmute(vcntq_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
     #[simd_test(enable = "neon")]
     unsafe fn test_vrev16_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
@@ -15350,35 +12407,6 @@ mod tests {
         let e: u16x8 = transmute(vrev64q_p16(transmute(a)));
         assert_eq!(r, e);
     }
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vmmlaq_s32() {
-        let a = i32x4::new(1, 3, 4, -0x10000);
-        let b = i8x16::new(1, 21, 31, 14, 5, 6, -128, 8, 9, 13, 15, 12, 13, -1, 20, 16);
-        let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
-        let e = i32x4::new(123, -5353, 690, -65576);
-        let r: i32x4 = transmute(vmmlaq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vmmlaq_u32() {
-        let a = u32x4::new(1, 3, 4, 0xffff0000);
-        let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
-        let c = u8x16::new(12, 22, 3, 4, 255, 56, 7, 8, 91, 10, 128, 15, 13, 14, 17, 16);
-        let e = u32x4::new(3195, 6935, 18354, 4294909144);
-        let r: u32x4 = transmute(vmmlaq_u32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon,i8mm")]
-    unsafe fn test_vusmmlaq_s32() {
-        let a = i32x4::new(1, 3, 4, -0x10000);
-        let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
-        let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
-        let e = i32x4::new(1915, -1001, 15026, -61992);
-        let r: i32x4 = transmute(vusmmlaq_s32(transmute(a), transmute(b), transmute(c)));
-        assert_eq!(r, e);
-    }
 
     macro_rules! test_vcombine {
         ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
diff --git a/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
index 04ebe8ab3b..cfb1a2843a 100644
--- a/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
+++ b/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs
@@ -50,8 +50,8 @@ test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127
 test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
 test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
 test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
-test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
-test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
+//test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45));
+//test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33));
 
 macro_rules! test_vsri {
     ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => {
@@ -89,5 +89,5 @@ test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127
 test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2));
 test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7));
 test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14));
-test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
-test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));
+//test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45));
+//test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33));
diff --git a/crates/intrinsic-test/Cargo.toml b/crates/intrinsic-test/Cargo.toml
index a358bea4b8..06051abc8d 100644
--- a/crates/intrinsic-test/Cargo.toml
+++ b/crates/intrinsic-test/Cargo.toml
@@ -4,7 +4,9 @@ version = "0.1.0"
 authors = ["Jamie Cunliffe <Jamie.Cunliffe@arm.com>",
     "James McGregor <James.McGregor2@arm.com",
     "Adam Gemmell <Adam.Gemmell@arm.com",
-    "Jacob Bramley <jacob.bramley@arm.com>"]
+    "Jacob Bramley <jacob.bramley@arm.com>",
+    "James Barford-Evans <james.barford-evans@arm.com>"
+    ]
 license = "MIT OR Apache-2.0"
 edition = "2024"
 
diff --git a/crates/intrinsic-test/src/argument.rs b/crates/intrinsic-test/src/argument.rs
index ebabe31927..3011bbf4a3 100644
--- a/crates/intrinsic-test/src/argument.rs
+++ b/crates/intrinsic-test/src/argument.rs
@@ -209,13 +209,13 @@ impl ArgumentList {
     /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at
     /// an offset `i` using a load intrinsic, in C.
     /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
-    pub fn load_values_c(&self, indentation: Indentation, p64_armv7_workaround: bool) -> String {
+    pub fn load_values_c(&self, indentation: Indentation, target: &str) -> String {
         self.iter()
             .filter_map(|arg| {
                 // The ACLE doesn't support 64-bit polynomial loads on Armv7
                 // This and the cast are a workaround for this
                 let armv7_p64 = if let TypeKind::Poly = arg.ty.kind() {
-                    p64_armv7_workaround
+                    target.contains("v7")
                 } else {
                     false
                 };
@@ -226,7 +226,7 @@ impl ArgumentList {
                         ty = arg.to_c_type(),
                         name = arg.name,
                         load = if arg.is_simd() {
-                            arg.ty.get_load_function(p64_armv7_workaround)
+                            arg.ty.get_load_function(armv7_p64)
                         } else {
                             "*".to_string()
                         },
diff --git a/crates/intrinsic-test/src/intrinsic.rs b/crates/intrinsic-test/src/intrinsic.rs
index b83c371ea4..b5c1071777 100644
--- a/crates/intrinsic-test/src/intrinsic.rs
+++ b/crates/intrinsic-test/src/intrinsic.rs
@@ -91,7 +91,7 @@ impl Intrinsic {
         indentation: Indentation,
         additional: &str,
         passes: u32,
-        p64_armv7_workaround: bool,
+        target: &str,
     ) -> String {
         let body_indentation = indentation.nested();
         format!(
@@ -100,9 +100,7 @@ impl Intrinsic {
                 {body_indentation}auto __return_value = {intrinsic_call}({args});\n\
                 {print_result}\n\
             {indentation}}}",
-            loaded_args = self
-                .arguments
-                .load_values_c(body_indentation, p64_armv7_workaround),
+            loaded_args = self.arguments.load_values_c(body_indentation, target),
             intrinsic_call = self.name,
             args = self.arguments.as_call_param_c(),
             print_result = self.print_result_c(body_indentation, additional)
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index 58966d230c..d9577db3c6 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -37,7 +37,7 @@ fn gen_code_c(
     intrinsic: &Intrinsic,
     constraints: &[&Argument],
     name: String,
-    p64_armv7_workaround: bool,
+    target: &str,
 ) -> String {
     if let Some((current, constraints)) = constraints.split_last() {
         let range = current
@@ -62,13 +62,13 @@ fn gen_code_c(
                         intrinsic,
                         constraints,
                         format!("{name}-{i}"),
-                        p64_armv7_workaround
+                        target,
                     )
                 )
             })
             .join("\n")
     } else {
-        intrinsic.generate_loop_c(indentation, &name, PASSES, p64_armv7_workaround)
+        intrinsic.generate_loop_c(indentation, &name, PASSES, target)
     }
 }
 
@@ -76,7 +76,7 @@ fn generate_c_program(
     notices: &str,
     header_files: &[&str],
     intrinsic: &Intrinsic,
-    p64_armv7_workaround: bool,
+    target: &str,
 ) -> String {
     let constraints = intrinsic
         .arguments
@@ -131,7 +131,7 @@ int main(int argc, char **argv) {{
             intrinsic,
             constraints.as_slice(),
             Default::default(),
-            p64_armv7_workaround
+            target,
         ),
     )
 }
@@ -174,7 +174,7 @@ fn gen_code_rust(
     }
 }
 
-fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, a32: bool) -> String {
+fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) -> String {
     let constraints = intrinsic
         .arguments
         .iter()
@@ -201,7 +201,11 @@ fn main() {{
 {passes}
 }}
 "#,
-        target_arch = if a32 { "arm" } else { "aarch64" },
+        target_arch = if target.contains("v7") {
+            "arm"
+        } else {
+            "aarch64"
+        },
         arglists = intrinsic
             .arguments
             .gen_arglists_rust(indentation.nested(), PASSES),
@@ -214,22 +218,70 @@ fn main() {{
     )
 }
 
-fn compile_c(c_filename: &str, intrinsic: &Intrinsic, compiler: &str, a32: bool) -> bool {
+fn compile_c(
+    c_filename: &str,
+    intrinsic: &Intrinsic,
+    compiler: &str,
+    target: &str,
+    cxx_toolchain_dir: Option<&str>,
+) -> bool {
     let flags = std::env::var("CPPFLAGS").unwrap_or("".into());
+    let arch_flags = if target.contains("v7") {
+        "-march=armv8.6-a+crypto+crc+dotprod"
+    } else {
+        "-march=armv8.6-a+crypto+sha3+crc+dotprod"
+    };
 
-    let output = Command::new("sh")
-        .arg("-c")
-        .arg(format!(
-            // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
-            "{cpp} {cppflags} {arch_flags} -ffp-contract=off -Wno-narrowing -O2 -target {target} -o c_programs/{intrinsic} {filename}",
-            target = if a32 { "armv7-unknown-linux-gnueabihf" } else { "aarch64-unknown-linux-gnu" },
-            arch_flags = if a32 { "-march=armv8.6-a+crypto+crc+dotprod" } else { "-march=armv8.6-a+crypto+sha3+crc+dotprod" },
-            filename = c_filename,
-            intrinsic = intrinsic.name,
-            cpp = compiler,
-            cppflags = flags,
-        ))
-        .output();
+    let intrinsic_name = &intrinsic.name;
+
+    let compiler_command = if target == "aarch64_be-unknown-linux-gnu" {
+        let Some(cxx_toolchain_dir) = cxx_toolchain_dir else {
+            panic!(
+                "When setting `--target aarch64_be-unknown-linux-gnu` the C++ compilers toolchain directory must be set with `--cxx-toolchain-dir <dest>`"
+            );
+        };
+
+        /* clang++ cannot link an aarch64_be object file, so we invoke
+         * aarch64_be-unknown-linux-gnu's C++ linker. This ensures that we
+         * are testing the intrinsics against LLVM.
+         *
+         * Note: setting `--sysroot=<...>` which is the obvious thing to do
+         * does not work as it gets caught up with `#include_next <stdlib.h>`
+         * not existing... */
+        format!(
+            "{compiler} {flags} {arch_flags} \
+            -ffp-contract=off \
+            -Wno-narrowing \
+            -O2 \
+            --target=aarch64_be-unknown-linux-gnu \
+            -I{cxx_toolchain_dir}/include \
+            -I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include \
+            -I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1 \
+            -I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu \
+            -I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward \
+            -I{cxx_toolchain_dir}/aarch64_be-none-linux-gnu/libc/usr/include \
+            -c {c_filename} \
+            -o c_programs/{intrinsic_name}.o && \
+            {cxx_toolchain_dir}/bin/aarch64_be-none-linux-gnu-g++ c_programs/{intrinsic_name}.o -o c_programs/{intrinsic_name} && \
+            rm c_programs/{intrinsic_name}.o",
+        )
+    } else {
+        // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations
+        let base_compiler_command = format!(
+            "{compiler} {flags} {arch_flags} -o c_programs/{intrinsic_name} {c_filename} -ffp-contract=off -Wno-narrowing -O2"
+        );
+
+        /* `-target` can be passed to some c++ compilers, however if we want to
+         *   use a c++ compiler does not support this flag we do not want to pass
+         *   the flag. */
+        if compiler.contains("clang") {
+            format!("{base_compiler_command} -target {target}")
+        } else {
+            format!("{base_compiler_command} -flax-vector-conversions")
+        }
+    };
+
+    let output = Command::new("sh").arg("-c").arg(compiler_command).output();
     if let Ok(output) = output {
         if output.status.success() {
             true
@@ -258,7 +310,13 @@ fn build_notices(line_prefix: &str) -> String {
     )
 }
 
-fn build_c(notices: &str, intrinsics: &Vec<Intrinsic>, compiler: Option<&str>, a32: bool) -> bool {
+fn build_c(
+    notices: &str,
+    intrinsics: &Vec<Intrinsic>,
+    compiler: Option<&str>,
+    target: &str,
+    cxx_toolchain_dir: Option<&str>,
+) -> bool {
     let _ = std::fs::create_dir("c_programs");
     intrinsics
         .par_iter()
@@ -266,25 +324,31 @@ fn build_c(notices: &str, intrinsics: &Vec<Intrinsic>, compiler: Option<&str>, a
             let c_filename = format!(r#"c_programs/{}.cpp"#, i.name);
             let mut file = File::create(&c_filename).unwrap();
 
-            let c_code = generate_c_program(notices, &["arm_neon.h", "arm_acle.h"], i, a32);
+            let c_code = generate_c_program(notices, &["arm_neon.h", "arm_acle.h"], i, target);
             file.write_all(c_code.into_bytes().as_slice()).unwrap();
             match compiler {
                 None => true,
-                Some(compiler) => compile_c(&c_filename, i, compiler, a32),
+                Some(compiler) => compile_c(&c_filename, i, compiler, target, cxx_toolchain_dir),
             }
         })
         .find_any(|x| !x)
         .is_none()
 }
 
-fn build_rust(notices: &str, intrinsics: &[Intrinsic], toolchain: Option<&str>, a32: bool) -> bool {
+fn build_rust(
+    notices: &str,
+    intrinsics: &[Intrinsic],
+    toolchain: Option<&str>,
+    target: &str,
+    linker: Option<&str>,
+) -> bool {
     intrinsics.iter().for_each(|i| {
         let rust_dir = format!(r#"rust_programs/{}"#, i.name);
         let _ = std::fs::create_dir_all(&rust_dir);
         let rust_filename = format!(r#"{rust_dir}/main.rs"#);
         let mut file = File::create(&rust_filename).unwrap();
 
-        let c_code = generate_rust_program(notices, i, a32);
+        let c_code = generate_rust_program(notices, i, target);
         file.write_all(c_code.into_bytes().as_slice()).unwrap();
     });
 
@@ -330,26 +394,39 @@ path = "{intrinsic}/main.rs""#,
         Some(t) => t,
     };
 
-    let output = Command::new("sh")
+    /* If there has been a linker explicitly set from the command line then
+     * we want to set it via setting it in the RUSTFLAGS*/
+
+    let cargo_command = format!(
+        "cargo {toolchain} build --target {target} --release",
+        toolchain = toolchain,
+        target = target
+    );
+
+    let mut command = Command::new("sh");
+    command
         .current_dir("rust_programs")
         .arg("-c")
-        .arg(format!(
-            "cargo {toolchain} build --target {target} --release",
-            toolchain = toolchain,
-            target = if a32 {
-                "armv7-unknown-linux-gnueabihf"
-            } else {
-                "aarch64-unknown-linux-gnu"
-            },
-        ))
-        .env("RUSTFLAGS", "-Cdebuginfo=0")
-        .output();
+        .arg(cargo_command);
+
+    let mut rust_flags = "-Cdebuginfo=0".to_string();
+    if let Some(linker) = linker {
+        rust_flags.push_str(" -C linker=");
+        rust_flags.push_str(linker);
+        rust_flags.push_str(" -C link-args=-static");
+
+        command.env("CPPFLAGS", "-fuse-ld=lld");
+    }
+
+    command.env("RUSTFLAGS", rust_flags);
+    let output = command.output();
+
     if let Ok(output) = output {
         if output.status.success() {
             true
         } else {
             error!(
-                "Failed to compile code for intrinsics\n\nstdout:\n{}\n\nstderr:\n{}",
+                "Failed to compile code for rust intrinsics\n\nstdout:\n{}\n\nstderr:\n{}",
                 std::str::from_utf8(&output.stdout).unwrap_or(""),
                 std::str::from_utf8(&output.stderr).unwrap_or("")
             );
@@ -387,13 +464,21 @@ struct Cli {
     #[arg(long)]
     skip: Option<PathBuf>,
 
-    /// Run tests for A32 instrinsics instead of A64
-    #[arg(long)]
-    a32: bool,
-
     /// Regenerate test programs, but don't build or run them
     #[arg(long)]
     generate_only: bool,
+
+    /// Pass a target the test suite
+    #[arg(long, default_value_t = String::from("aarch64-unknown-linux-gnu"))]
+    target: String,
+
+    /// Set the linker
+    #[arg(long)]
+    linker: Option<String>,
+
+    /// Set the sysroot for the C++ compiler
+    #[arg(long)]
+    cxx_toolchain_dir: Option<String>,
 }
 
 fn main() {
@@ -403,6 +488,10 @@ fn main() {
 
     let filename = args.input;
     let c_runner = args.runner.unwrap_or_default();
+    let target: &str = args.target.as_str();
+    let linker = args.linker.as_deref();
+    let cxx_toolchain_dir = args.cxx_toolchain_dir;
+
     let skip = if let Some(filename) = args.skip {
         let data = std::fs::read_to_string(&filename).expect("Failed to open file");
         data.lines()
@@ -413,7 +502,7 @@ fn main() {
     } else {
         Default::default()
     };
-    let a32 = args.a32;
+    let a32 = target.contains("v7");
     let mut intrinsics = get_neon_intrinsics(&filename).expect("Error parsing input file");
 
     intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
@@ -450,16 +539,22 @@ fn main() {
 
     let notices = build_notices("// ");
 
-    if !build_c(&notices, &intrinsics, cpp_compiler.as_deref(), a32) {
+    if !build_c(
+        &notices,
+        &intrinsics,
+        cpp_compiler.as_deref(),
+        target,
+        cxx_toolchain_dir.as_deref(),
+    ) {
         std::process::exit(2);
     }
 
-    if !build_rust(&notices, &intrinsics, toolchain.as_deref(), a32) {
+    if !build_rust(&notices, &intrinsics, toolchain.as_deref(), target, linker) {
         std::process::exit(3);
     }
 
     if let Some(ref toolchain) = toolchain {
-        if !compare_outputs(&intrinsics, toolchain, &c_runner, a32) {
+        if !compare_outputs(&intrinsics, toolchain, &c_runner, target) {
             std::process::exit(1)
         }
     }
@@ -471,7 +566,12 @@ enum FailureReason {
     Difference(String, String, String),
 }
 
-fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a32: bool) -> bool {
+fn compare_outputs(
+    intrinsics: &Vec<Intrinsic>,
+    toolchain: &str,
+    runner: &str,
+    target: &str,
+) -> bool {
     let intrinsics = intrinsics
         .par_iter()
         .filter_map(|intrinsic| {
@@ -483,21 +583,30 @@ fn compare_outputs(intrinsics: &Vec<Intrinsic>, toolchain: &str, runner: &str, a
                     intrinsic = intrinsic.name,
                 ))
                 .output();
-            let rust = Command::new("sh")
-                .current_dir("rust_programs")
-                .arg("-c")
-                .arg(format!(
-                    "cargo {toolchain} run --target {target} --bin {intrinsic} --release",
-                    intrinsic = intrinsic.name,
-                    toolchain = toolchain,
-                    target = if a32 {
-                        "armv7-unknown-linux-gnueabihf"
-                    } else {
-                        "aarch64-unknown-linux-gnu"
-                    },
-                ))
-                .env("RUSTFLAGS", "-Cdebuginfo=0")
-                .output();
+
+            let rust = if target != "aarch64_be-unknown-linux-gnu" {
+                Command::new("sh")
+                    .current_dir("rust_programs")
+                    .arg("-c")
+                    .arg(format!(
+                        "cargo {toolchain} run --target {target} --bin {intrinsic} --release",
+                        intrinsic = intrinsic.name,
+                        toolchain = toolchain,
+                        target = target
+                    ))
+                    .env("RUSTFLAGS", "-Cdebuginfo=0")
+                    .output()
+            } else {
+                Command::new("sh")
+                    .arg("-c")
+                    .arg(format!(
+                        "{runner} ./rust_programs/target/{target}/release/{intrinsic}",
+                        runner = runner,
+                        target = target,
+                        intrinsic = intrinsic.name,
+                    ))
+                    .output()
+            };
 
             let (c, rust) = match (c, rust) {
                 (Ok(c), Ok(rust)) => (c, rust),
diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 6f622b18be..3ce3e4fcb4 100644
--- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -2,6 +2,8 @@ arch_cfgs:
   - arch_name: aarch64
     target_feature: [neon]
     llvm_prefix: llvm.aarch64.neon
+# Generate big endian shuffles
+auto_big_endian: true
 
 # Repeatedly used anchors 
 # #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -66,6 +68,9 @@ neon-unstable-i8mm: &neon-unstable-i8mm
 neon-unstable-fcma: &neon-unstable-fcma
   FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']]
 
+aarch64-crc-stable: &aarch64-crc-stable
+  FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']]
+
 intrinsics:
   - name: "vaddd_{type}"
     doc: Add
@@ -1841,10 +1846,11 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [poly64x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [float64x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [poly64x2_t, ' static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [float64x2_t, ' static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vmla{neon_type.no}"
     doc: "Floating-point multiply-add to accumulator"
@@ -4631,6 +4637,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
               arch: aarch64,arm64ec
+      - FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
 
   - name: "vaddlv{neon_type[0].no}"
     doc: Unsigned Add Long across Vector
@@ -4648,6 +4655,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
               arch: aarch64,arm64ec
+      - FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
 
   - name: "vsubw_high{neon_type[1].noq}"
     doc: Signed Subtract Wide
@@ -7213,19 +7221,37 @@ intrinsics:
       - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
       - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
       - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
       - Identifier: ["{type[5]}", Symbol]
 
+  - name: "vcopy{neon_type[0].lane_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - Identifier: ["{type[5]}", Symbol]
+      - Identifier: ["{type[6]}", Symbol]
+
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
@@ -7251,20 +7277,38 @@ intrinsics:
       - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
       - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
       - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
       - Identifier: ["{type[5]}", Symbol]
 
+  - name: "vcopy{neon_type[0].laneq_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);', 'match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = simd_shuffle!(a, a, [0, 1, 2, 3]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - Identifier: ["{type[5]}", Symbol]
+      - Identifier: ["{type[6]}", Symbol]
+
   - name: "vcopyq_lane_{neon_type[0]}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
@@ -7277,14 +7321,15 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int64x2_t, int64x1_t, 'let b: int64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [uint64x2_t, uint64x1_t, 'let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [poly64x2_t, poly64x1_t, 'let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
-      - [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]); match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [int64x2_t, int64x1_t, 'let b: int64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [uint64x2_t, uint64x1_t, 'let b: uint64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [poly64x2_t, poly64x1_t, 'let b: poly64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float64x2_t, float64x1_t, ' let b: float64x2_t = simd_shuffle!(b, b, [0, 1]);', 'match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
       - FnCall: [static_assert!, ['LANE2 == 0']]
       - Identifier: ['{type[2]}', Symbol]
+      - Identifier: ['{type[3]}', Symbol]
 
   - name: "vcopyq_lane_f32"
     doc: "Insert vector element from another vector element"
@@ -7298,11 +7343,12 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]); match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
+      - [float32x4_t, float32x2_t, ' let b: float32x4_t = simd_shuffle!(b, b, [0, 1, 2, 3]);', 'match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), }']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, 2]]
       - FnCall: [static_assert_uimm_bits!, [LANE2, 1]]
       - Identifier: ["{type[2]}", Symbol]
+      - Identifier: ["{type[3]}", Symbol]
 
   - name: "vcreate_f64"
     doc: "Insert vector element from another vector element"
@@ -7646,6 +7692,73 @@ intrinsics:
             - link: "llvm.aarch64.crypto.sha512su1"
               arch: aarch64,arm64ec
 
+  - name: "vsm3tt{type[0]}"
+    doc: "{type[3]}"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sm4"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, ['{type[2]}', 'IMM2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_sm4"', 'issue = "117226"']]
+    static_defs: ["const IMM2: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['1aq_u32', uint32x4_t, 'sm3tt1a', 'SM3TT1A']
+      - ['1bq_u32', uint32x4_t, 'sm3tt1b', 'SM3TT1B']
+      - ['2aq_u32', uint32x4_t, 'sm3tt2a', 'SM3TT2A']
+      - ['2bq_u32', uint32x4_t, 'sm3tt2b', 'SM3TT2B']
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [IMM2, "2"]]
+      - LLVMLink:
+          name: "_vsm3tt{type[0]}"
+          arguments:
+            - "a: {neon_type[1]}"
+            - "b: {neon_type[1]}"
+            - "c: {neon_type[1]}"
+            - "n: i64"
+          links:
+            - link: "llvm.aarch64.crypto.{type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vsm3tt{type[0]}"
+          - - "a.as_signed()"
+            - "b.as_signed()"
+            - "c.as_signed()"
+            - "IMM2 as i64"
+
+  - name: "vxarq_u64"
+    doc: "Exclusive OR and rotate"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,sha3"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, ['xar', 'IMM6 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - FnCall: [stable, ['feature = "stdarch_neon_sha3"', 'since = "1.79.0"']]
+    static_defs: ["const IMM6: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint64x2_t
+    compose:
+      - FnCall: ["static_assert_uimm_bits!", [IMM6, "6"]]
+      - LLVMLink:
+          name: "_vxarq_u64"
+          arguments:
+            - "a: {neon_type}"
+            - "b: {neon_type}"
+            - "n: i64"
+          links:
+            - link: "llvm.aarch64.crypto.xar"
+              arch: aarch64,arm64ec
+      - FnCall:
+          - "_vxarq_u64"
+          - - "a.as_signed()"
+            - "b.as_signed()"
+            - "IMM6 as i64"
+
   - name: "vrnd32x{neon_type.no}"
     doc: "Floating-point round to 32-bit integer, using current rounding mode"
     arguments: ["a: {neon_type}"]
@@ -9850,3 +9963,1690 @@ intrinsics:
           - transmute
           - - FnCall: ["vld4{type[2]}", [{FnCall: [transmute, [a]]}]]
 
+  - name: "vtbx4{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x4_t]
+    compose:
+      - FnCall:
+          - "vqtbx2"
+          - - FnCall: [transmute, [a]]
+            - FnCall:
+                - transmute
+                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
+            - FnCall:
+                - transmute
+                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
+            - FnCall: [transmute, [c]]
+
+  - name: "vtbx4{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint8x8x4_t, uint8x8_t]
+      - [poly8x8_t, poly8x8x4_t, uint8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vqtbx2"
+                - - FnCall: [transmute, [a]]
+                  - FnCall:
+                      - transmute
+                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
+                  - FnCall:
+                      - transmute
+                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
+                  - c
+
+  - name: "vtbl1{neon_type[0].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x8_t', 'transmute(b)']
+      - [uint8x8_t, 'uint8x8_t', 'b']
+      - [poly8x8_t, 'uint8x8_t', 'b']
+    compose:
+      - FnCall: 
+          - 'vqtbl1{neon_type[0].no}'
+          - - FnCall:
+                - 'vcombine{neon_type[0].no}'
+                - - a
+                  - 'crate::mem::zeroed()'
+            - Identifier: ['{type[2]}', Symbol]
+
+  - name: "vtbl2{neon_type[1].noq}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8x2_t, 'int8x8_t']
+    compose:
+      - FnCall:
+          - vqtbl1
+          - - FnCall:
+                - transmute
+                - - FnCall:
+                      - 'vcombine{neon_type[1].noq}'
+                      - - 'a.0'
+                        - 'a.1'
+            - FnCall: [transmute, [b]]
+
+  - name: "vtbl2{neon_type[2].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t']
+      - [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vqtbl1
+                - - FnCall:
+                      - transmute
+                      - - FnCall:
+                            - 'vcombine{neon_type[2].noq}'
+                            - - 'a.0'
+                              - 'a.1'
+                  - b
+
+  - name: "vtbl3{neon_type[1].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8x3_t, 'int8x8_t', 'int8x16x2']
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - '{type[2]}_t'
+              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
+                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'crate::mem::zeroed()']]
+      - FnCall: 
+          - transmute
+          - - FnCall:
+                - vqtbl2
+                - - FnCall: [transmute, ['x.0']]
+                  - FnCall: [transmute, ['x.1']]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtbl3{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
+      - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
+    big_endian_inverse: true 
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - '{type[2]}_t'
+              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
+                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'crate::mem::zeroed()']]
+      - FnCall: 
+          - transmute
+          - - FnCall:
+                - vqtbl2
+                - - FnCall: [transmute, ['x.0']]
+                  - FnCall: [transmute, ['x.1']]
+                  - b
+
+  - name: "vtbl4{neon_type[1].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8x4_t, 'int8x8_t', 'int8x16x2']
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - '{type[2]}_t'
+              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
+                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vqtbl2'
+                - - FnCall: [transmute, ['x.0']]
+                  - FnCall: [transmute, ['x.1']]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtbl4{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
+      - [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
+    big_endian_inverse: true
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - '{type[2]}_t'
+              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
+                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vqtbl2'
+                - - FnCall: [transmute, ['x.0']]
+                  - FnCall: [transmute, ['x.1']]
+                  - b
+
+  - name: "vqtbx1{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1]
+      - [int8x16_t, int8x16_t, uint8x16_t, vqtbx1q]
+    compose:
+      - FnCall: ['{type[3]}', [a, b, c]]
+
+  - name: "vqtbx1{type[4]}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"]
+      - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
+      - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
+      - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
+    compose:
+      - Let:
+        - x
+        - FnCall:
+            - transmute
+            - - FnCall:
+                  - "{type[3]}"
+                  - - FnCall: [transmute, [a]]
+                    - FnCall: [transmute, [b]]
+                    - c
+      - Identifier: [x, Symbol]
+  
+  - name: "vtbx1{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"]
+      - [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
+      - [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
+    compose:
+      - FnCall:
+          - simd_select
+          - - FnCall: 
+                - "simd_lt::<{type[4]}_t, int8x8_t>"
+                - - c
+                  - FnCall: [transmute, ["{type[3]}"]]
+            - FnCall:
+                - transmute
+                - - FnCall: 
+                      - "vqtbx1"
+                      - - "transmute(a)"
+                        - FnCall:
+                            - transmute
+                            - - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]]
+                        - "{type[2]}"
+            - a
+
+  - name: "vtbx2{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x8x2_t']
+    compose:
+      - FnCall:
+          - vqtbx1
+          - - FnCall: [transmute, [a]]
+            - FnCall:
+                - transmute
+                - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+            - FnCall: [transmute, [c]]
+
+  - name: "vtbx2{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'uint8x8x2_t', uint8x8_t]
+      - [poly8x8_t, 'poly8x8x2_t', uint8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vqtbx1
+                - - FnCall: [transmute, [a]]
+                  - FnCall:
+                      - transmute
+                      - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+                  - c
+
+  - name: "vtbx3{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8']
+    compose:
+      - Let:
+         - x
+         - FnCall:
+             - '{type[2]}_t'
+             - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+               - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'crate::mem::zeroed()']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - simd_select
+                - - FnCall:
+                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
+                      - - FnCall: [transmute, [c]]
+                        - FnCall: [transmute, ['{type[3]}']]
+                  - FnCall:
+                      - transmute
+                      - - FnCall:
+                            - 'vqtbx2'
+                            - - FnCall: [transmute, [a]]
+                              - FnCall: [transmute, ['x.0']]
+                              - FnCall: [transmute, ['x.1']]
+                              - FnCall: [transmute, [c]]
+                  - a
+
+  - name: "vtbx3{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: uint8x8_t"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8']
+      - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8']
+    big_endian_inverse: true
+    compose:
+      - Let:
+         - x
+         - FnCall:
+             - '{type[2]}_t'
+             - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+               - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'crate::mem::zeroed()']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - simd_select
+                - - FnCall:
+                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
+                      - - FnCall: [transmute, [c]]
+                        - FnCall: [transmute, ['{type[3]}']]
+                  - FnCall:
+                      - transmute
+                      - - FnCall:
+                            - 'vqtbx2'
+                            - - FnCall: [transmute, [a]]
+                              - FnCall: [transmute, ['x.0']]
+                              - FnCall: [transmute, ['x.1']]
+                              - c
+                  - a
+
+  - name: "vqtbl1{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t']
+      - ['int8x16_t', uint8x16_t, 'vqtbl1q', 'int8x16_t']
+    compose:
+      - FnCall: ['{type[2]}', ['a', b]]
+
+  - name: "vqtbl1{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t']
+      - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
+      - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
+      - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - transmute
+              - - FnCall: 
+                    - '{type[2]}'
+                    - - FnCall: [transmute, ['a']]
+                      - b
+      - Identifier: [x, Symbol]
+
+  - name: "vqtbl2{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t']
+      - ['int8x16x2_t', uint8x16_t, 'vqtbl2q', 'int8x16_t']
+    compose:
+      - FnCall: ['{type[2]}', ['a.0', 'a.1', b]]
+
+  - name: "vqtbl2{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t']
+      - ['uint8x16x2_t', uint8x16_t, 'vqtbl2q', 'uint8x16_t']
+      - ['poly8x16x2_t', uint8x8_t, 'vqtbl2', 'poly8x8_t']
+      - ['poly8x16x2_t', uint8x16_t, 'vqtbl2q', 'poly8x16_t']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[2]}'
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - b
+
+  - name: "vqtbx2{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2']
+      - [int8x16_t, 'int8x16x2_t', uint8x16_t, 'vqtbx2q']
+    compose:
+      - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', c]]
+
+  - name: "vqtbx2{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2']
+      - [uint8x16_t, 'uint8x16x2_t', uint8x16_t, 'vqtbx2q']
+      - [poly8x8_t, 'poly8x16x2_t', uint8x8_t, 'vqtbx2']
+      - [poly8x16_t, 'poly8x16x2_t', uint8x16_t, 'vqtbx2q']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[3]}'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, ['b.0']]
+                  - FnCall: [transmute, ['b.1']]
+                  - c
+
+  - name: "vqtbl3{neon_type[0].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3']
+      - ['int8x16_t', 'int8x16x3_t', uint8x16_t, 'vqtbl3q']
+    compose:
+      - FnCall: ['{type[3]}', ['a.0', 'a.1', 'a.2', b]]
+
+  - name: "vqtbl3{neon_type[0].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3']
+      - ['uint8x16_t','uint8x16x3_t', uint8x16_t, 'vqtbl3q']
+      - ['poly8x8_t', 'poly8x16x3_t', uint8x8_t, 'vqtbl3']
+      - ['poly8x16_t','poly8x16x3_t', uint8x16_t, 'vqtbl3q']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[3]}'
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - FnCall: [transmute, ['a.2']]
+                  - b
+
+  - name: "vqtbx3{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3']
+      - [int8x16_t, 'int8x16x3_t', uint8x16_t, 'vqtbx3q']
+    compose:
+      - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', 'b.2', c]]
+
+  - name: "vqtbx3{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3']
+      - [uint8x16_t, 'uint8x16x3_t', uint8x16_t, 'vqtbx3q']
+      - [poly8x8_t, 'poly8x16x3_t', uint8x8_t, 'vqtbx3']
+      - [poly8x16_t, 'poly8x16x3_t', uint8x16_t, 'vqtbx3q']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[3]}'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, ['b.0']]
+                  - FnCall: [transmute, ['b.1']]
+                  - FnCall: [transmute, ['b.2']]
+                  - c
+
+  - name: "vqtbl4{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t']
+      - ['int8x16x4_t', uint8x16_t, 'vqtbl4q', 'int8x16_t']
+    compose:
+      - FnCall: ['{type[2]}', ['a.0', 'a.1', 'a.2', 'a.3', b]]
+
+  - name: "vqtbl4{neon_type[3].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t']
+      - ['uint8x16x4_t', uint8x16_t, 'vqtbl4q', 'uint8x16_t']
+      - ['poly8x16x4_t', uint8x8_t, 'vqtbl4', 'poly8x8_t']
+      - ['poly8x16x4_t', uint8x16_t, 'vqtbl4q', 'poly8x16_t']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[2]}'
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - FnCall: [transmute, ['a.2']]
+                  - FnCall: [transmute, ['a.3']]
+                  - b
+
+  - name: "vqtbx4{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4']
+      - [int8x16_t, 'int8x16x4_t', uint8x16_t, 'vqtbx4q']
+    compose:
+      - FnCall: ['{type[3]}', [a, 'b.0', 'b.1', 'b.2', 'b.3', c]]
+
+  - name: "vqtbx4{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4']
+      - [uint8x16_t, 'uint8x16x4_t', uint8x16_t, 'vqtbx4q']
+      - [poly8x8_t, 'poly8x16x4_t', uint8x8_t, 'vqtbx4']
+      - [poly8x16_t, 'poly8x16x4_t', uint8x16_t, 'vqtbx4q']
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall: 
+                - '{type[3]}'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, ['b.0']]
+                  - FnCall: [transmute, ['b.1']]
+                  - FnCall: [transmute, ['b.2']]
+                  - FnCall: [transmute, ['b.3']]
+                  - c
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"]
+      - ["vqtbl1q", "int8x16_t", "uint8x16_t", "int8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbl1.{neon_type[3]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"]
+      - ["vqtbl2q", "int8x16_t", "uint8x16_t", "int8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbl2.{neon_type[3]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}", "d: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t]
+      - ["vqtbl3q", int8x16_t, uint8x16_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbl3.{neon_type[3]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}", "d: {neon_type[1]}", "e: {neon_type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t]
+      - ["vqtbl4q", int8x16_t, uint8x16_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbl4.{neon_type[3]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"]
+      - [vqtbx1q, "int8x16_t", "int8x16_t", "uint8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbx1.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}",  "c: {neon_type[2]}", "d: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"]
+      - [vqtbx2q, "int8x16_t", "int8x16_t", "uint8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbx2.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}",  "c: {neon_type[2]}", "d: {neon_type[2]}", "e: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"]
+      - [vqtbx3q, "int8x16_t", "int8x16_t", "uint8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbx3.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: {neon_type[2]}", "d: {neon_type[2]}", "e: {neon_type[2]}", "f: {neon_type[3]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"]
+      - [vqtbx4q, "int8x16_t", "int8x16_t", "uint8x16_t"]
+    compose:
+      - LLVMLink:
+          name: "_{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.tbx4.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["ptr: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "{type[2]}"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*const i8', int8x8_t, "neon"]
+      - ['*const i8', int8x16_t, "neon"]
+      - ['*const i16', int16x4_t, "neon"]
+      - ['*const i16', int16x8_t, "neon"]
+      - ['*const i32', int32x2_t, "neon"]
+      - ['*const i32', int32x4_t, "neon"]
+      - ['*const i64', int64x1_t, "neon"]
+      - ['*const i64', int64x2_t, "neon"]
+      - ['*const u8', uint8x8_t, "neon"]
+      - ['*const u8', uint8x16_t, "neon"]
+      - ['*const u16', uint16x4_t, "neon"]
+      - ['*const u16', uint16x8_t, "neon"]
+      - ['*const u32', uint32x2_t, "neon"]
+      - ['*const u32', uint32x4_t, "neon"]
+      - ['*const u64', uint64x1_t, "neon"]
+      - ['*const u64', uint64x2_t, "neon"]
+      - ['*const p8', poly8x8_t, "neon"]
+      - ['*const p8', poly8x16_t, "neon"]
+      - ['*const p16', poly16x4_t, "neon"]
+      - ['*const p16', poly16x8_t, "neon"]
+      - ['*const p64', poly64x1_t, "neon,aes"]
+      - ['*const p64', poly64x2_t, "neon,aes"]
+      - ['*const f32', float32x2_t, "neon"]
+      - ['*const f32', float32x4_t, "neon"]
+      - ['*const f64', float64x1_t, "neon"]
+      - ['*const f64', float64x2_t, "neon"]
+    compose:
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - ptr
+                - cast
+                - []
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers."
+    arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"]
+    attr:
+      - FnCall: [target_feature, ['enable = "{type[2]}"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]]
+      - FnCall: [allow, ['clippy::cast_ptr_alignment']]
+      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['*mut i8', int8x8_t, "neon"]
+      - ['*mut i8', int8x16_t, "neon"]
+      - ['*mut i16', int16x4_t, "neon"]
+      - ['*mut i16', int16x8_t, "neon"]
+      - ['*mut i32', int32x2_t, "neon"]
+      - ['*mut i32', int32x4_t, "neon"]
+      - ['*mut i64', int64x1_t, "neon"]
+      - ['*mut i64', int64x2_t, "neon"]
+      - ['*mut u8', uint8x8_t, "neon"]
+      - ['*mut u8', uint8x16_t, "neon"]
+      - ['*mut u16', uint16x4_t, "neon"]
+      - ['*mut u16', uint16x8_t, "neon"]
+      - ['*mut u32', uint32x2_t, "neon"]
+      - ['*mut u32', uint32x4_t, "neon"]
+      - ['*mut u64', uint64x1_t, "neon"]
+      - ['*mut u64', uint64x2_t, "neon"]
+      - ['*mut p8', poly8x8_t, "neon"]
+      - ['*mut p8', poly8x16_t, "neon"]
+      - ['*mut p16', poly16x4_t, "neon"]
+      - ['*mut p16', poly16x8_t, "neon"]
+      - ['*mut p64', poly64x1_t, "neon,aes"]
+      - ['*mut p64', poly64x2_t, "neon,aes"]
+      - ['*mut f32', float32x2_t, "neon"]
+      - ['*mut f32', float32x4_t, "neon"]
+      - ['*mut f64', float64x1_t, "neon"]
+      - ['*mut f64', float64x2_t, "neon"]
+    compose:
+      - FnCall:
+          - 'crate::ptr::write_unaligned'
+          - - MethodCall:
+                - ptr
+                - cast
+                - []
+            - a
+
+  - name: "__crc32d"
+    doc: "CRC32 single round checksum for quad words (64 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32x"]] }]]
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u64]
+    compose:
+      - LLVMLink:
+          name: "crc32x"
+          arguments:
+            - "crc: u32"
+            - "data: u64"
+          links:
+            - link: "llvm.aarch64.crc32x"
+              arch: aarch64,arm64ec
+
+  - name: "__crc32cd"
+    doc: "CRC32-C single round checksum for quad words (64 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *target-not-arm
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cx"]] }]]
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u64]
+    compose:
+      - LLVMLink:
+          name: "crc32cx"
+          arguments:
+            - "crc: u32"
+            - "data: u64"
+          links:
+            - link: "llvm.aarch64.crc32cx"
+              arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    doc: "Absolute Value (wrapping)."
+    arguments: ["a: {type[1]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [abs]
+    safety:
+      unsafe: [neon]
+    types:
+      - ['vabsd_s64', i64, i64]
+      - ['vabs_s64', int64x1_t, v1i64]
+      - ['vabsq_s64', int64x2_t, v2i64]
+    compose:
+      - LLVMLink:
+          name: "{type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.abs.{type[2]}"
+              arch: aarch64,arm64ec
+
+  - name: "vuqadd{neon_type[0].no}"
+    doc: "Signed saturating Accumulate of Unsigned value."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+    assert_instr: [suqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, uint8x8_t]
+      - [int8x16_t, uint8x16_t]
+      - [int16x4_t, uint16x4_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x2_t, uint32x2_t]
+      - [int32x4_t, uint32x4_t]
+      - [int64x1_t, uint64x1_t]
+      - [int64x2_t, uint64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vuqadd{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.suqadd.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vsqadd{neon_type[0].no}"
+    doc: "Unsigned saturating Accumulate of Signed value."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+    assert_instr: [usqadd]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vsqadd{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.usqadd.{neon_type[1]}"
+              arch: aarch64,arm64ec
+
+  - name: "vpadd{neon_type.no}"
+    doc: "Add Pairwise"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+      - int64x2_t
+    compose:
+      - LLVMLink:
+          name: "vpadd{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.addp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpadd{neon_type[0].no}"
+    doc: "Add Pairwise"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, int8x16_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x2_t, int64x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vpadd{neon_type[1].no}'
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vpaddd_s64"
+    doc: "Add pairwise"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, i64]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vaddvq_u64"
+                - - FnCall: [transmute, [a]]
+
+  - name: "vpaddd_u64"
+    doc: "Add pairwise"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x2_t, u64]
+    compose:
+      - FnCall: [vaddvq_u64, [a]]
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, i8, i32]
+      - [int16x4_t, i16, i32]
+      - [int8x16_t, i8, i32]
+      - [int16x8_t, i16, i32]
+      - [int32x4_t, i32, i32]
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.saddv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int32x2_t, i32] 
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.saddv.i32.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int64x2_t, i64]
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.saddv.i64.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  u8, i32]
+      - [uint16x4_t, u16, i32]
+      - [uint8x16_t, u8, i32]
+      - [uint16x8_t, u16, i32]
+      - [uint32x4_t, u32, i32]
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x2_t, u32, i32] 
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddv{neon_type[0].no}"
+    doc: "Add across vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [addp]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint64x2_t, u64, i64]
+    compose:
+      - LLVMLink:
+          name: "vaddv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.uaddv.{type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vaddlv{neon_type[0].no}"
+    doc: "Signed Add Long across Vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [saddlv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, i16]
+      - [int8x16_t, i16]
+    compose:
+      - LLVMLink:
+          name: "vaddlv{neon_type[0].no}"
+          return_type: "i32"
+          links:
+            - link: "llvm.aarch64.neon.saddlv.i32.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - Identifier: ["_vaddlv{neon_type[0].no}(a) as i16", Symbol]
+
+  - name: "vaddlv{neon_type[0].no}"
+    doc: "Unsigned Add Long across Vector"
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [uaddlv]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, u16]
+      - [uint8x16_t, u16]
+    compose:
+      - LLVMLink:
+          name: "vaddlv{neon_type[0].no}"
+          return_type: "i32"
+          links:
+            - link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - Identifier: ["_vaddlv{neon_type[0].no}(a.as_signed()).as_unsigned() as u16", Symbol]
+
+  - name: "vmaxv{neon_type[0].no}"
+    doc: "Horizontal vector max."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, i8, 'smaxv']
+      - [int16x4_t, i16, 'smaxv']
+      - [int32x2_t, i32, 'smaxp'] 
+      - [int8x16_t, i8, 'smaxv']
+      - [int16x8_t, i16, 'smaxv']
+      - [int32x4_t, i32, 'smaxv']
+    compose:
+      - LLVMLink:
+          name: "vmaxv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.smaxv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxv{neon_type[0].no}"
+    doc: "Horizontal vector max."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, u8, 'umaxv']
+      - [uint16x4_t, u16, 'umaxv']
+      - [uint32x2_t, u32, 'umaxp'] 
+      - [uint8x16_t, u8, 'umaxv']
+      - [uint16x8_t, u16, 'umaxv']
+      - [uint32x4_t, u32, 'umaxv']
+    compose:
+      - LLVMLink:
+          name: "vmaxv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.umaxv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vmaxv{neon_type[0].no}"
+    doc: "Horizontal vector max."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, f32, 'fmaxp']
+      - [float32x4_t, f32, 'fmaxv']
+      - [float64x2_t, f64, 'fmaxp'] 
+    compose:
+      - LLVMLink:
+          name: "vmaxv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vminv{neon_type[0].no}"
+    doc: "Horizontal vector min."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, i8, 'sminv']
+      - [int16x4_t, i16, 'sminv']
+      - [int32x2_t, i32, 'sminp'] 
+      - [int8x16_t, i8, 'sminv']
+      - [int16x8_t, i16, 'sminv']
+      - [int32x4_t, i32, 'sminv']
+    compose:
+      - LLVMLink:
+          name: "vminv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.sminv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vminv{neon_type[0].no}"
+    doc: "Horizontal vector min."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, u8, 'uminv']
+      - [uint16x4_t, u16, 'uminv']
+      - [uint32x2_t, u32, 'uminp'] 
+      - [uint8x16_t, u8, 'uminv']
+      - [uint16x8_t, u16, 'uminv']
+      - [uint32x4_t, u32, 'uminv']
+    compose:
+      - LLVMLink:
+          name: "vminv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.uminv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vminv{neon_type[0].no}"
+    doc: "Horizontal vector min."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[2]}']
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, f32, 'fminp']
+      - [float32x4_t, f32, 'fminv']
+      - [float64x2_t, f64, 'fminp'] 
+    compose:
+      - LLVMLink:
+          name: "vminv{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.fminv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['sminp']
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpmin{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.sminp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['uminp']
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+      - uint16x8_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpmin{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.uminp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['fminp']
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x4_t
+      - float64x2_t 
+    compose:
+      - LLVMLink:
+          name: "vpmin{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fminp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['smaxp']
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpmax{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.smaxp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['umaxp']
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+      - uint16x8_t
+      - uint32x4_t
+    compose:
+      - LLVMLink:
+          name: "vpmax{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.umaxp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-stable
+    assert_instr: ['fmaxp']
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x4_t
+      - float64x2_t 
+    compose:
+      - LLVMLink:
+          name: "vpmax{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxp.{neon_type}"
+              arch: aarch64,arm64ec
+
+  - name: "vsli{neon_type[0].N}"
+    doc: "Shift Left and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sli, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,  'static_assert_uimm_bits!', 'N, 3']
+      - [int8x16_t, 'static_assert_uimm_bits!', 'N, 3']
+      - [int16x4_t, 'static_assert_uimm_bits!', 'N, 4']
+      - [int16x8_t, 'static_assert_uimm_bits!', 'N, 4']
+      - [int32x2_t, 'static_assert!', 'N >= 0 && N <= 31']
+      - [int32x4_t, 'static_assert!', 'N >= 0 && N <= 31']
+      - [int64x1_t, 'static_assert!', 'N >= 0 && N <= 63']
+      - [int64x2_t, 'static_assert!', 'N >= 0 && N <= 63']
+    compose:
+      - FnCall: ['{type[1]}', ['{type[2]}']]
+      - LLVMLink:
+          name: "vsli{neon_type[0].N}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "b: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vsli.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vsli{neon_type[0].N}", [a, b, N]]
+
+  - name: "vsli{neon_type[0].N}"
+    doc: "Shift Left and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "{type[4]}"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sli, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  int8x8_t,  'static_assert_uimm_bits!', 'N, 3', "neon"]
+      - [uint8x16_t, int8x16_t, 'static_assert_uimm_bits!', 'N, 3', "neon"]
+      - [uint16x4_t, int16x4_t, 'static_assert_uimm_bits!', 'N, 4', "neon"]
+      - [uint16x8_t, int16x8_t, 'static_assert_uimm_bits!', 'N, 4', "neon"]
+      - [uint32x2_t, int32x2_t, 'static_assert!', 'N >= 0 && N <= 31', "neon"]
+      - [uint32x4_t, int32x4_t, 'static_assert!', 'N >= 0 && N <= 31', "neon"]
+      - [uint64x1_t, int64x1_t, 'static_assert!', 'N >= 0 && N <= 63', "neon"]
+      - [uint64x2_t, int64x2_t, 'static_assert!', 'N >= 0 && N <= 63', "neon"]
+      - [poly8x8_t,  int8x8_t,  'static_assert_uimm_bits!', 'N, 3', "neon"]
+      - [poly8x16_t, int8x16_t, 'static_assert_uimm_bits!', 'N, 3', "neon"]
+      - [poly16x4_t, int16x4_t, 'static_assert_uimm_bits!', 'N, 4', "neon"]
+      - [poly16x8_t, int16x8_t, 'static_assert_uimm_bits!', 'N, 4', "neon"]
+      - [poly64x1_t, int64x1_t, 'static_assert!', 'N >= 0 && N <= 63', "neon,aes"]
+      - [poly64x2_t, int64x2_t, 'static_assert!', 'N >= 0 && N <= 63', "neon,aes"]
+    compose:
+      - FnCall: ['{type[2]}', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vsli{neon_type[1].N}::<N>'
+                - - FnCall:
+                      - transmute
+                      - - a
+                  - FnCall:
+                      - transmute
+                      - - b
+
+  - name: "vsri{neon_type[0].N}"
+    doc: "Shift Right and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sri, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,  'N >= 1 && N <= 8']
+      - [int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, 'N >= 1 && N <= 16']
+      - [int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, 'N >= 1 && N <= 32']
+      - [int32x4_t, 'N >= 1 && N <= 32']
+      - [int64x1_t, 'N >= 1 && N <= 64']
+      - [int64x2_t, 'N >= 1 && N <= 64']
+    compose:
+      - FnCall: ['static_assert!', ['{type[1]}']]
+      - LLVMLink:
+          name: "vsri{neon_type[0].N}"
+          arguments:
+            - "a: {neon_type[0]}"
+            - "b: {neon_type[0]}"
+            - "n: i32"
+          links:
+            - link: "llvm.aarch64.neon.vsri.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ["_vsri{neon_type[0].N}", [a, b, N]]
+
+  - name: "vsri{neon_type[0].N}"
+    doc: "Shift Right and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "{type[3]}"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sri, 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+      - *neon-stable
+    static_defs: ['const N: i32']
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  int8x8_t,  'N >= 1 && N <= 8', "neon"]
+      - [uint8x16_t, int8x16_t, 'N >= 1 && N <= 8', "neon"]
+      - [uint16x4_t, int16x4_t, 'N >= 1 && N <= 16', "neon"]
+      - [uint16x8_t, int16x8_t, 'N >= 1 && N <= 16', "neon"]
+      - [uint32x2_t, int32x2_t, 'N >= 1 && N <= 32', "neon"]
+      - [uint32x4_t, int32x4_t, 'N >= 1 && N <= 32', "neon"]
+      - [uint64x1_t, int64x1_t, 'N >= 1 && N <= 64', "neon"]
+      - [uint64x2_t, int64x2_t, 'N >= 1 && N <= 64', "neon"]
+      - [poly8x8_t,  int8x8_t,  'N >= 1 && N <= 8', "neon"]
+      - [poly8x16_t, int8x16_t, 'N >= 1 && N <= 8', "neon"]
+      - [poly16x4_t, int16x4_t, 'N >= 1 && N <= 16', "neon"]
+      - [poly16x8_t, int16x8_t, 'N >= 1 && N <= 16', "neon"]
+      - [poly64x1_t, int64x1_t, 'N >= 1 && N <= 64', "neon,aes"]
+      - [poly64x2_t, int64x2_t, 'N >= 1 && N <= 64', "neon,aes"]
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vsri{neon_type[1].N}::<N>'
+                - - FnCall:
+                      - transmute
+                      - - a
+                  - FnCall:
+                      - transmute
+                      - - b
diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index a3c26a709b..5d20bfc90c 100644
--- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -2,6 +2,8 @@ arch_cfgs:
   - arch_name: aarch64
     target_feature: [neon]
     llvm_prefix: llvm.aarch64.neon
+# Generate big endian shuffles
+auto_big_endian: true
 
 # Repeatedly used anchors 
 # #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -31,6 +33,9 @@ target-is-arm: &target-is-arm
 target-not-arm: &target-not-arm
   FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm"']]}]]
 
+not-arm: &not-arm
+  FnCall: [not, ['target_arch = "arm"']]
+
 neon-target-aarch64-arm64ec: &neon-target-aarch64-arm64ec
   FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]
 
@@ -66,6 +71,12 @@ neon-unstable-i8mm: &neon-unstable-i8mm
 neon-unstable-fcma: &neon-unstable-fcma
   FnCall: [unstable, ['feature = "stdarch_neon_fcma"', 'issue = "117222"']]
 
+arm-crc-unstable: &arm-crc-unstable
+  FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_aarch32_crc32"', 'issue = "125085"']]}]]
+
+aarch64-crc-stable: &aarch64-crc-stable
+  FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "stdarch_aarch64_crc32"', 'since = "1.80.0"']]}]]
+
 intrinsics:
   - name: "vand{neon_type.no}"
     doc: Vector bitwise and
@@ -612,24 +623,6 @@ intrinsics:
           - "vcls{neon_type[1].no}"
           - - FnCall: [transmute, [a]]
 
-  - name: "vclz{neon_type.no}"
-    doc: "Count leading zero bits"
-    arguments: ["a: {neon_type}"]
-    return_type: "{neon_type}"
-    attr:
-      - FnCall: [cfg_attr, [target_arch = "arm", {FnCall: [target_feature, ['enable = "v7"']]}]]
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, 'target_arch = "arm"']]}, {FnCall: [assert_instr, ['"vclz.i8"']]}]]
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}]]}, {FnCall: [assert_instr, [clz]]}]]
-      - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
-      - FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
-    safety:
-      unsafe: [neon]
-    types:
-      - int8x8_t
-      - int8x16_t
-    compose:
-      - FnCall: ["vclz{neon_type.no}_", [a]]
-
   - name: "vclz{neon_type[0].no}"
     doc: "Count leading zero bits"
     arguments: ["a: {neon_type[0]}"]
@@ -649,7 +642,7 @@ intrinsics:
       - FnCall:
           - transmute
           - - FnCall:
-                - "vclz{neon_type[1].no}_"
+                - "vclz{neon_type[1].no}"
                 - - FnCall: [transmute, [a]]
 
   - name: "vclz{neon_type[0].no}"
@@ -665,12 +658,20 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
+      - [int8x8_t, '.i8']
+      - [int8x16_t, '.i8']
       - [int16x4_t, '.i16']
       - [int16x8_t, '.i16']
       - [int32x2_t, '.i32']
       - [int32x4_t, '.i32']
     compose:
-      - FnCall: ["vclz{neon_type[0].no}_", [a]]
+      - LLVMLink:
+          name: "vclz{neon_type[0].no}"
+          links:
+            - link: "llvm.ctlz.{neon_type[0]}"
+              arch: arm
+            - link: "llvm.ctlz.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
   - name: "vclz{neon_type[0].no}"
     doc: "Count leading zero bits"
@@ -693,7 +694,7 @@ intrinsics:
       - FnCall:
           - transmute
           - - FnCall:
-                - "vclz{neon_type[2].no}_"
+                - "vclz{neon_type[2].no}"
                 - - FnCall: [transmute, [a]]
 
   - name: "vcagt{neon_type[0].no}"
@@ -1281,14 +1282,15 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [int16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [uint8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [uint16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [poly8x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
-      - [poly16x8_t, ' static_assert_uimm_bits!(N, 3); match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [int8x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [int16x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [uint8x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [uint16x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [poly8x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
+      - [poly16x8_t, ' static_assert_uimm_bits!(N, 3);', 'match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -1305,11 +1307,12 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
-      - [uint8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
-      - [poly8x16_t, ' static_assert_uimm_bits!(N, 4); match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - [int8x16_t, ' static_assert_uimm_bits!(N, 4);', 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - [uint8x16_t, ' static_assert_uimm_bits!(N, 4);', 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
+      - [poly8x16_t, ' static_assert_uimm_bits!(N, 4);', 'match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -1326,14 +1329,15 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int16x4_t, 'static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [int32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [uint16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [uint32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [poly16x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
-      - [float32x4_t, ' static_assert_uimm_bits!(N, 2); match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [int16x4_t, 'static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [int32x4_t, ' static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [uint16x4_t, ' static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [uint32x4_t, ' static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [poly16x4_t, ' static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
+      - [float32x4_t, ' static_assert_uimm_bits!(N, 2);', 'match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -1350,11 +1354,12 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [uint32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [float32x2_t, ' static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [int32x2_t, ' static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [uint32x2_t, ' static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [float32x2_t, ' static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vext{neon_type[0].no}"
     doc: "Extract vector from pair of vectors"
@@ -1371,10 +1376,11 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [int64x2_t, 'static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
-      - [uint64x2_t, 'static_assert_uimm_bits!(N, 1); match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [int64x2_t, 'static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
+      - [uint64x2_t, 'static_assert_uimm_bits!(N, 1);', 'match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), }']
     compose:
       - Identifier: ["{type[1]}", Symbol]
+      - Identifier: ["{type[2]}", Symbol]
 
   - name: "vmla{neon_type[0].no}"
     doc: "Multiply-add to accumulator"
@@ -1838,7 +1844,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uqsub.v{neon_type[0].lane}{type[2]}"
               arch: aarch64,arm64ec
-            - link: "llvm.usub.sat.{neon_type[0].lane}{type[2]}"
+            - link: "llvm.usub.sat.v{neon_type[0].lane}{type[2]}"
               arch: arm
 
   - name: "vqsub{neon_type[0].no}"
@@ -1868,7 +1874,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.sqsub.v{neon_type[0].lane}{type[2]}"
               arch: aarch64,arm64ec
-            - link: "llvm.ssub.sat.{neon_type[0].lane}{type[2]}"
+            - link: "llvm.ssub.sat.v{neon_type[0].lane}{type[2]}"
               arch: arm
 
   - name: "vhadd{neon_type.no}"
@@ -2166,9 +2172,9 @@ intrinsics:
       - LLVMLink:
           name: "vld1x{neon_type[1].tuple}.{neon_type[1]}"
           links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}"
+            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
               arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0f{neon_type[1].base}"
+            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
               arch: arm
 
   - name: "vld1{neon_type[1].no}"
@@ -2212,9 +2218,9 @@ intrinsics:
       - LLVMLink:
           name: "ld1x{neon_type[1].tuple}.{neon_type[1]}"
           links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0i{neon_type[1].base}"
+            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0"
               arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0i{neon_type[1].base}"
+            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0"
               arch: arm
 
   - name: "vld1{neon_type[1].no}"
@@ -2354,7 +2360,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}"
               arch: arm
       - FnCall:
           - "_vld2{neon_type[1].nox}"
@@ -2381,7 +2387,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2.v{neon_type[1].lane}{type[2]}"
               arch: arm
       - FnCall:
           - "_vld2{neon_type[1].nox}"
@@ -2413,7 +2419,7 @@ intrinsics:
           arguments:
             - "ptr: *const {neon_type[3]}"
           links:
-            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0"
               arch: aarch64,arm64ec
       - FnCall:
           - "_vld2{neon_type[1].nox}"
@@ -2437,7 +2443,7 @@ intrinsics:
           arguments:
             - "ptr: *const {neon_type[3]}"
           links:
-            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}"
+            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0"
               arch: aarch64,arm64ec
       - FnCall:
           - "_vld2{neon_type[1].nox}"
@@ -2556,7 +2562,7 @@ intrinsics:
             - "n: i32"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0"
               arch: arm
       - FnCall:
           - "_vld2_lane{neon_type[1].nox}"
@@ -2642,7 +2648,7 @@ intrinsics:
             - "n: i64"
             - "ptr: *const i8"
           links:
-            - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.aarch64.neon.ld2lane.v{neon_type[1].lane}{type[2]}.p0"
               arch: aarch64,arm64ec
       - FnCall:
           - "_vld2{neon_type[1].lane_nox}"
@@ -2690,7 +2696,7 @@ intrinsics:
             - "n: i32"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2lane.v{neon_type[1].lane}{type[2]}.p0"
               arch: arm
       - FnCall:
           - "_vld2{neon_type[1].lane_nox}"
@@ -2720,7 +2726,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0"
               arch: arm
       - FnCall:
           - "_vld2{neon_type[1].dup_nox}"
@@ -2745,7 +2751,7 @@ intrinsics:
           arguments:
             - "ptr: *const i64"
           links:
-            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0i64"
+            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0"
               arch: aarch64,arm64ec
       - FnCall:
           - "_vld2{neon_type[1].dup_nox}"
@@ -2778,7 +2784,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld2dup.v{neon_type[1].lane}{type[2]}.p0"
               arch: arm
       - FnCall:
           - "_vld2{neon_type[1].dup_nox}"
@@ -2893,7 +2899,7 @@ intrinsics:
           arguments:
             - "ptr: {type[0]}"
           links:
-            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0{type[2]}"
+            - link: "llvm.aarch64.neon.ld2r.v{neon_type[1].lane}{type[2]}.p0"
               arch: aarch64,arm64ec
       - FnCall:
           - "_vld2{neon_type[1].dup_nox}"
@@ -2929,7 +2935,7 @@ intrinsics:
             - 'n: i64'
             - 'ptr: *const i8'
           links:
-            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
 
@@ -2958,7 +2964,7 @@ intrinsics:
           arguments:
             - 'ptr: {type[2]}'
           links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
 
@@ -2980,7 +2986,7 @@ intrinsics:
           arguments:
             - 'ptr: {type[2]}'
           links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0v{neon_type[1].lane}{type[3]}'
+            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
 
@@ -3011,7 +3017,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']]
 
@@ -3035,7 +3041,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld3.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].nox}', ['a as *const i8', '{neon_type[1].base_byte_size}']]
 
@@ -3065,7 +3071,7 @@ intrinsics:
             - 'n: i64'
             - 'ptr: *const i8'
           links:
-            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.aarch64.neon.ld3lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld3{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'LANE as i64', 'a as _']]
 
@@ -3096,7 +3102,7 @@ intrinsics:
             - 'n: i32'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
 
@@ -3131,7 +3137,7 @@ intrinsics:
             - 'n: i32'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
 
@@ -3162,7 +3168,7 @@ intrinsics:
             - 'n: i32'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.arm.neon.vld3lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'LANE', '{type[5]}']]
 
@@ -3297,7 +3303,7 @@ intrinsics:
           arguments:
             - 'ptr: {type[0]}'
           links:
-            - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0{type[2]}'
+            - link: 'llvm.aarch64.neon.ld3r.v{neon_type[1].lane}{type[2]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as _']]
 
@@ -3325,7 +3331,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']]
 
@@ -3378,7 +3384,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld3dup.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld3{neon_type[1].dup_nox}', ['a as *const i8', '{type[3]}']]
 
@@ -3454,7 +3460,7 @@ intrinsics:
           arguments:
             - 'ptr: {type[3]}'
           links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
 
@@ -3474,7 +3480,7 @@ intrinsics:
           arguments:
             - 'ptr: {type[3]}'
           links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0v{neon_type[1].lane}{type[2]}'
+            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
 
@@ -3511,7 +3517,7 @@ intrinsics:
             - 'n: i64'
             - 'ptr: *const i8'
           links:
-            - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.aarch64.neon.ld4lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: aarch64,arm64ec
       - FnCall: ['_vld4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
 
@@ -3542,7 +3548,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']]
 
@@ -3566,7 +3572,7 @@ intrinsics:
             - 'ptr: *const i8'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0i8'
+            - link: 'llvm.arm.neon.vld4.v{neon_type[1].lane}{type[2]}.p0'
               arch: arm
       - FnCall: ['_vld4{neon_type[1].nox}', ['a as *const i8', '{type[3]}']]
 
@@ -3685,7 +3691,7 @@ intrinsics:
             - 'n: i32'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[3]}.p0i8'
+            - link: 'llvm.arm.neon.vld4lane.v{neon_type[1].lane}{type[3]}.p0'
               arch: arm
       - FnCall: ['_vld4{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', LANE, '{type[4]}']]
 
@@ -3955,7 +3961,7 @@ intrinsics:
             - 'a: {type[2]}'
             - 'b: {type[2]}'
           links:
-            - link: 'llvm.arm.neon.vst1x2.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst1x2.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1']]
 
@@ -3987,7 +3993,7 @@ intrinsics:
             - 'b: {type[2]}'
             - 'c: {type[2]}'
           links:
-            - link: 'llvm.arm.neon.vst1x3.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst1x3.p0{type[0]}.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2']]
 
@@ -4020,7 +4026,7 @@ intrinsics:
             - 'c: {type[2]}'
             - 'd: {type[2]}'
           links:
-            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']]
 
@@ -4047,7 +4053,7 @@ intrinsics:
             - 'c: {type[2]}'
             - 'd: {type[2]}'
           links:
-            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst1x4.p0{type[0]}.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst1{neon_type[1].no}', ['a', 'b.0', 'b.1', 'b.2', 'b.3']]
 
@@ -4092,7 +4098,7 @@ intrinsics:
             - 'b: {type[2]}'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']]
 
@@ -4286,7 +4292,7 @@ intrinsics:
             - 'b: {type[2]}'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vst2.p0i8.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', "{type[3]}"]]
 
@@ -4321,7 +4327,7 @@ intrinsics:
             - 'n: i32'
             - 'size: i32'
           links:
-            - link: 'llvm.arm.neon.vst2lane.p0i8.v{neon_type[1].lane}{type[0]}'
+            - link: 'llvm.arm.neon.vst2lane.v{neon_type[1].lane}{type[0]}.p0'
               arch: arm
       - FnCall: ['_vst2{neon_type[1].lane_nox}', ['a as _', 'b.0', 'b.1', 'LANE', "{type[4]}"]]
 
@@ -5141,14 +5147,14 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ["p8", poly8x8_t, poly16x8_t, int8x8_t]
+      - ["p8", poly8x8_t, poly16x8_t]
     compose:
       - LLVMLink:
           name: "pmull.{neon_type[1].no}"
           links:
-            - link: "llvm.aarch64.neon.pmull.{neon_type[3]}"
+            - link: "llvm.aarch64.neon.pmull.v8i16"
               arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vmullp.{neon_type[3]}"
+            - link: "llvm.arm.neon.vmullp.v8i16"
               arch: arm
 
   - name: "vmull_n{neon_type[0].no}"
@@ -8420,7 +8426,7 @@ intrinsics:
             - "a: {neon_type[2]}"
             - "b: {neon_type[2]}"
           links:
-            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f32.{neon_type[2]}"
+            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0"
               arch: arm
       - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1']]
 
@@ -8446,7 +8452,7 @@ intrinsics:
             - "b: {neon_type[2]}"
             - "c: {neon_type[2]}"
           links:
-            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.p0f32.{neon_type[2]}"
+            - link: "llvm.arm.neon.vst1x{neon_type[1].tuple}.{neon_type[2]}.p0"
               arch: arm
       - FnCall: ["_vst1{neon_type[1].no}", ['a', 'b.0', 'b.1', 'b.2']]
 
@@ -9662,7 +9668,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld4dup.{neon_type[2]}.p0i8"
+            - link: "llvm.arm.neon.vld4dup.{neon_type[2]}.p0"
               arch: arm
       - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', "{type[3]}"]]
 
@@ -9692,7 +9698,7 @@ intrinsics:
           arguments:
             - "ptr: {type[0]}"
           links:
-            - link: "llvm.aarch64.neon.ld4r.{neon_type[2]}.{type[3]}"
+            - link: "llvm.aarch64.neon.ld4r.{neon_type[2]}.{type[3]}.p0"
               arch: aarch64,arm64ec
       - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as _']]
 
@@ -9716,7 +9722,7 @@ intrinsics:
             - "ptr: *const i8"
             - "size: i32"
           links:
-            - link: "llvm.arm.neon.vld4dup.v1i64.p0i8"
+            - link: "llvm.arm.neon.vld4dup.v1i64.p0"
               arch: arm
       - FnCall: ["_vld4{neon_type[1].dup_nox}", ['a as *const i8', '8']]
 
@@ -9787,3 +9793,1990 @@ intrinsics:
           - "transmute"
           - - FnCall: ["vld4{neon_type[2].dup_nox}", [{FnCall: [transmute, [a]]}]]
 
+  - name: "vld1{type[0]}"
+    visibility: private 
+    doc: "Load multiple single-element structures to one, two, three, or four registers"
+    arguments: ["a: {type[1]}", "b: {type[2]}"]
+    return_type: "{neon_type[3]}"
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [unstable, ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["_v8i8", "*const i8", "i32", "int8x8_t"]
+      - ["q_v16i8", "*const i8", "i32", "int8x16_t"]
+      - ["_v4i16", "*const i8", "i32", "int16x4_t"]
+      - ["q_v8i16", "*const i8", "i32", "int16x8_t"]
+      - ["_v2i32", "*const i8", "i32", "int32x2_t"]
+      - ["q_v4i32", "*const i8", "i32", "int32x4_t"]
+      - ["_v1i64", "*const i8", "i32", "int64x1_t"]
+      - ["q_v2i64", "*const i8", "i32", "int64x2_t"]
+      - ["_v2f32", "*const i8", "i32", "float32x2_t"]
+      - ["q_v4f32", "*const i8", "i32", "float32x4_t"]
+    compose:
+      - LLVMLink:
+          name: "vld1.{type[0]}"
+          links:
+            - link: "llvm.arm.neon.vld1.{neon_type[3]}"
+              arch: arm
+      - FnCall: ["_vld1{type[0]}", [a, b]]
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers."
+    arguments: ["ptr: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+    types:
+      - ['*const i8',  int8x8_t,  '"vld1.8"', 'crate::mem::align_of::<i8>() as i32', '_v8i8']
+      - ['*const i8',  int8x16_t, '"vld1.8"', 'crate::mem::align_of::<i8>() as i32', 'q_v16i8']
+      - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::<i16>() as i32', '_v4i16']
+      - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::<i16>() as i32', 'q_v8i16']
+      - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::<i32>() as i32', '_v2i32']
+      - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::<i32>() as i32', 'q_v4i32']
+      - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::<i64>() as i32', '_v1i64']
+      - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::<i64>() as i32', 'q_v2i64']
+    compose:
+      - FnCall:
+          - "vld1{type[4]}"
+          - - 'ptr as *const i8'
+            - '{type[3]}'
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers."
+    arguments: ["ptr: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "{type[3]}"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+    types:
+      - ['*const u8',  uint8x8_t,   '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<u8>() as i32', '_v8i8']
+      - ['*const u8',  uint8x16_t,  '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<u8>() as i32', 'q_v16i8']
+      - ['*const u16', uint16x4_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<u16>() as i32', '_v4i16']
+      - ['*const u16', uint16x8_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<u16>() as i32', 'q_v8i16']
+      - ['*const u32', uint32x2_t,  'vldr', 'neon,v7', 'crate::mem::align_of::<u32>() as i32', '_v2i32']
+      - ['*const u32', uint32x4_t,  '"vld1.32"', 'neon,v7', 'crate::mem::align_of::<u32>() as i32', 'q_v4i32']
+      - ['*const u64', uint64x1_t,  'vldr', 'neon,v7', 'crate::mem::align_of::<u64>() as i32', '_v1i64']
+      - ['*const u64', uint64x2_t,  '"vld1.64"', 'neon,v7', 'crate::mem::align_of::<u64>() as i32', 'q_v2i64']
+      - ['*const p8',  poly8x8_t,   '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<p8>() as i32', '_v8i8']
+      - ['*const p8',  poly8x16_t,  '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<p8>() as i32', 'q_v16i8']
+      - ['*const p16', poly16x4_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<p16>() as i32', '_v4i16']
+      - ['*const p16', poly16x8_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<p16>() as i32', 'q_v8i16']
+      - ['*const p64', poly64x2_t,  '"vld1.64"', 'neon,aes', 'crate::mem::align_of::<p64>() as i32', 'q_v2i64']
+      - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::<f32>() as i32', '_v2f32']
+      - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::<f32>() as i32', 'q_v4f32']
+    compose:
+      - FnCall:
+        - transmute
+        - - FnCall:
+              - "vld1{type[5]}"
+              - - 'ptr as *const i8'
+                - '{type[4]}'
+
+  - name: "vld1{neon_type[1].no}"
+    doc: "Load multiple single-element structures to one, two, three, or four registers."
+    arguments: ["ptr: {type[0]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [target_feature, ['enable = "neon,aes"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vldr']]}]]
+    types:
+      - ['*const p64', poly64x1_t]
+    compose:
+      # Inlining seems broken for 'fn vld1_v1i64', this "fixes" it
+      - Let: [a, '*const i8', 'ptr as *const i8']
+      - Let: [b, i32, 'crate::mem::align_of::<p64>() as i32']
+      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; }} transmute(_vld1_v1i64(a, b))'
+
+  - name: "vtbx1"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - "int8x8_t"
+    compose:
+      - LLVMLink:
+          name: "vtbx1"
+          links:
+            - link: "llvm.arm.neon.vtbx1"
+              arch: arm
+
+  - name: "vtbx1_s8"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+    compose:
+      - FnCall: [vtbx1, [a, b, c]]
+
+  - name: "vtbx1{neon_type.no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: uint8x8_t"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - poly8x8_t
+    compose:
+      - FnCall: 
+          - transmute
+          - - FnCall: 
+                - vtbx1
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+                  - FnCall: [transmute, [c]]
+
+  - name: "vtbx2"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - "int8x8_t"
+    compose:
+      - LLVMLink:
+          name: "vtbx2"
+          links:
+            - link: "llvm.arm.neon.vtbx2"
+              arch: arm
+
+  - name: "vtbx2_s8"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x2_t]
+    compose:
+      - FnCall: [vtbx2, [a, 'b.0', 'b.1', c]]
+
+  - name: "vtbx2{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint8x8x2_t, uint8x8_t]
+      - [poly8x8_t, poly8x8x2_t, uint8x8_t]
+    compose:
+      - FnCall: 
+          - transmute
+          - - FnCall: 
+                - vtbx2
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, ['b.0']]
+                  - FnCall: [transmute, ['b.1']]
+                  - FnCall: [transmute, [c]]
+
+  - name: "vtbx3"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - "int8x8_t"
+    compose:
+      - LLVMLink:
+          name: "vtbx3"
+          links:
+            - link: "llvm.arm.neon.vtbx3"
+              arch: arm
+
+  - name: "vtbx3_s8"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t, int8x8x3_t]
+    compose:
+      - FnCall: [vtbx3, [a, 'b.0', 'b.1', 'b.2', c]]
+
+  - name: "vtbx3{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, uint8x8x3_t, uint8x8_t]
+      - [poly8x8_t, poly8x8x3_t, uint8x8_t]
+    compose:
+      - FnCall: 
+          - transmute
+          - - FnCall: 
+                - vtbx3
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, ['b.0']]
+                  - FnCall: [transmute, ['b.1']]
+                  - FnCall: [transmute, ['b.2']]
+                  - FnCall: [transmute, [c]]
+
+  - name: "vtbx4"
+    visibility: private
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}", "f: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - "int8x8_t"
+    compose:
+      - LLVMLink:
+          name: "vtbx4"
+          links:
+            - link: "llvm.arm.neon.vtbx4"
+              arch: arm
+
+  - name: "vtbx4{neon_type[0].noq}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["uint8x8_t", "uint8x8x4_t", "uint8x8_t"]
+      - ["poly8x8_t", "poly8x8x4_t", "uint8x8_t"]
+    compose:
+      - FnCall: 
+          - "transmute"
+          - - FnCall:
+               - vtbx4
+               - - FnCall: [transmute, [a]]
+                 - FnCall: [transmute, ["b.0"]]
+                 - FnCall: [transmute, ["b.1"]]
+                 - FnCall: [transmute, ["b.2"]]
+                 - FnCall: [transmute, ["b.3"]]
+                 - FnCall: [transmute, [c]]
+
+  - name: "vtbx4{neon_type[0].noq}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *enable-v7
+      - *target-is-arm
+      - *neon-unstable
+    assert_instr: [vtbx]
+    safety:
+      unsafe: [neon]
+    types:
+      - ["int8x8_t", "int8x8x4_t"]
+    big_endian_inverse: true
+    compose:
+       - FnCall:
+           - vtbx4
+           - - a
+             - FnCall: [transmute, ["b.0"]]
+             - FnCall: [transmute, ["b.1"]]
+             - FnCall: [transmute, ["b.2"]]
+             - FnCall: [transmute, ["b.3"]]
+             - c
+
+  - name: "vcombine{neon_type[0].noq}"
+    doc: "Vector combine"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [float32x2_t, float32x4_t, '[0, 1, 2, 3]']
+      - [poly8x8_t, poly8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [poly16x4_t, poly16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int8x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [int64x1_t, int64x2_t, '[0, 1]']
+      - [uint8x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [uint64x1_t, uint64x2_t, '[0, 1]']
+      - [poly64x1_t, poly64x2_t, '[0, 1]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, b, '{type[2]}']]
+
+  - name: "vaeseq_u8"
+    doc: "AES single round encryption."
+    arguments: ["data: {neon_type}", "key: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "aes"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, [aese]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+    compose:
+      - LLVMLink:
+          name: "vaeseq_u8"
+          links:
+            - link: "llvm.aarch64.crypto.aese"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.aese"
+              arch: arm
+
+  - name: "vaesdq_u8"
+    doc: "AES single round encryption."
+    arguments: ["data: {neon_type}", "key: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "aes"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, [aesd]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x16_t
+    compose:
+      - LLVMLink:
+          name: "vaesdq_u8"
+          links:
+            - link: "llvm.aarch64.crypto.aesd"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.aesd"
+              arch: arm
+    
+  - name: "vaesmcq_u8"
+    doc: "AES mix columns."
+    arguments: ["data: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "aes"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, "aesmc"]
+    compose:
+      - LLVMLink:
+          name: "vaesmcq_u8"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vaesimcq_u8"
+    doc: "AES inverse mix columns."
+    arguments: ["data: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "aes"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, "aesimc"]
+    compose:
+      - LLVMLink:
+          name: "vaesimcq_u8"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1h_u32"
+    doc: "SHA1 fixed rotate."
+    arguments: ["hash_e: {type[0]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, "sha1h"]
+    compose:
+      - LLVMLink:
+          name: "vsha1h_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1cq_u32"
+    doc: "SHA1 hash update accelerator, choose."
+    arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, "sha1c", "uint32x4_t"]
+    compose:
+      - LLVMLink:
+          name: "vsha1cq_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1mq_u32"
+    doc: "SHA1 hash update accelerator, majority"
+    arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, "sha1m", "uint32x4_t"]
+    compose:
+      - LLVMLink:
+          name: "vsha1mq_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1pq_u32"
+    doc: "SHA1 hash update accelerator, parity"
+    arguments: ["hash_abcd: {neon_type[2]}", "hash_e: {type[0]}", "wk: {neon_type[2]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, "sha1p", "uint32x4_t"]
+    compose:
+      - LLVMLink:
+          name: "vsha1pq_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1su0q_u32"
+    doc: "SHA1 schedule update accelerator, first part."
+    arguments: ["w0_3: {neon_type[0]}", "w4_7: {neon_type[0]}", "w8_11: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha1su0"]
+    compose:
+      - LLVMLink:
+          name: "vsha1su0q_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha1su1q_u32"
+    doc: "SHA1 schedule update accelerator, second part."
+    arguments: ["tw0_3: {neon_type[0]}", "w12_15: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha1su1"]
+    compose:
+      - LLVMLink:
+          name: "vsha1su0q_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha256hq_u32"
+    doc: "SHA1 schedule update accelerator, first part."
+    arguments: ["hash_abcd: {neon_type[0]}", "hash_efgh: {neon_type[0]}", "wk: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha256h"]
+    compose:
+      - LLVMLink:
+          name: "vsha256hq_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha256h2q_u32"
+    doc: "SHA1 schedule update accelerator, upper part."
+    arguments: ["hash_abcd: {neon_type[0]}", "hash_efgh: {neon_type[0]}", "wk: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha256h2"]
+    compose:
+      - LLVMLink:
+          name: "vsha256h2q_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha256su0q_u32"
+    doc: "SHA256 schedule update accelerator, first part."
+    arguments: ["w0_3: {neon_type[0]}", "w4_7: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha256su0"]
+    compose:
+      - LLVMLink:
+          name: "vsha256su0q_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "vsha256su1q_u32"
+    doc: "SHA256 schedule update accelerator, second part."
+    arguments: ["tw0_3: {neon_type[0]}", "w8_11: {neon_type[0]}", "w12_15: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "sha2"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["{type[1]}"]] }]]
+      - *neon-unstable-is-arm
+      - FnCall: [cfg_attr, [*not-arm, { FnCall: [stable, ['feature = "aarch64_neon_crypto_intrinsics"', 'since = "1.72.0"']] }]]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint32x4_t, "sha256su1"]
+    compose:
+      - LLVMLink:
+          name: "vsha256su1q_u32"
+          links:
+            - link: "llvm.aarch64.crypto.{type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.{type[1]}"
+              arch: arm
+
+  - name: "__crc32b"
+    doc: "CRC32 single round checksum for bytes (8 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32b"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u8]
+    compose:
+      - LLVMLink:
+          name: "crc32b"
+          arguments:
+            - "crc: u32"
+            - "data: u32"
+          links:
+            - link: "llvm.aarch64.crc32b"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32b"
+              arch: arm
+      - FnCall: ["___crc32b", ["crc.as_signed()", "data.as_signed() as i32"]]
+
+  - name: "__crc32h"
+    doc: "CRC32 single round checksum for bytes (16 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32h"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u16]
+    compose:
+      - LLVMLink:
+          name: "crc32h"
+          arguments:
+            - "crc: u32"
+            - "data: u32"
+          links:
+            - link: "llvm.aarch64.crc32h"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32h"
+              arch: arm
+      - FnCall: ["___crc32h", ["crc.as_signed()", "data.as_signed() as i32"]]
+
+  - name: "__crc32w"
+    doc: "CRC32 single round checksum for bytes (32 bits)."
+    arguments: ["crc: {type}", "data: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32w"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - u32
+    compose:
+      - LLVMLink:
+          name: "crc32w"
+          links:
+            - link: "llvm.aarch64.crc32w"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32w"
+              arch: arm
+
+  - name: "__crc32cb"
+    doc: "CRC32-C single round checksum for bytes (8 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cb"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u8]
+    compose:
+      - LLVMLink:
+          name: "crc32cb"
+          arguments:
+            - "crc: u32"
+            - "data: u32"
+          links:
+            - link: "llvm.aarch64.crc32cb"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32cb"
+              arch: arm
+      - FnCall: ["___crc32cb", ["crc.as_signed()", "data.as_signed() as i32"]]
+
+  - name: "__crc32ch"
+    doc: "CRC32-C single round checksum for bytes (16 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32ch"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u16]
+    compose:
+      - LLVMLink:
+          name: "crc32ch"
+          arguments:
+            - "crc: u32"
+            - "data: u32"
+          links:
+            - link: "llvm.aarch64.crc32ch"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32ch"
+              arch: arm
+      - FnCall: ["___crc32ch", ["crc.as_signed()", "data.as_signed() as i32"]]
+
+  - name: "__crc32cw"
+    doc: "CRC32-C single round checksum for bytes (32 bits)."
+    arguments: ["crc: {type}", "data: {type}"]
+    return_type: "{type}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - *neon-v8
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cw"]] }]]
+      - *arm-crc-unstable
+      - *aarch64-crc-stable
+    safety:
+      unsafe: [neon]
+    types:
+      - u32
+    compose:
+      - LLVMLink:
+          name: "crc32cw"
+          links:
+            - link: "llvm.aarch64.crc32cw"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.crc32cw"
+              arch: arm
+
+  - name: "__crc32d"
+    doc: "CRC32 single round checksum for quad words (64 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32w"]] }]]
+      - *arm-crc-unstable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u64]
+    compose:
+      # As the call to `__crc32` does not get inlined, we define an LLVM binding
+      # here, which is the same as above, and call it directly which results 
+      # in the correct instructions being generated
+      - Let: [a, i32, 'crc as i32']
+      - Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
+      - Let: [c, i32, '(data >> 32).as_signed() as i32']
+      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: i32, data: i32) -> i32;}} ___crc32w(___crc32w(a, b), c).as_unsigned()'
+
+  - name: "__crc32cd"
+    doc: "CRC32-C single round checksum for quad words (64 bits)."
+    arguments: ["crc: {type[0]}", "data: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "crc"']]
+      - FnCall: [cfg, ['target_arch = "arm"']]
+      - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["crc32cw"]] }]]
+      - *arm-crc-unstable
+    safety:
+      unsafe: [neon]
+    types:
+      - [u32, u64]
+    compose:
+      - Let: [a, i32, 'crc as i32']
+      - Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
+      - Let: [c, i32, '(data >> 32).as_signed() as i32']
+      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: i32, data: i32) -> i32;}} ___crc32cw(___crc32cw(a, b), c).as_unsigned() as u32'
+
+  - name: "vabs{neon_type.no}"
+    doc: "Absolute value (wrapping)."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [abs]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int16x4_t
+      - int32x2_t
+      - int8x16_t
+      - int16x8_t
+      - int32x4_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.abs.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vabs.{neon_type}"
+              arch: arm
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sminp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int16x4_t
+      - int32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.sminp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpmins.{neon_type}"
+              arch: arm
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uminp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint16x4_t
+      - uint32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.uminp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpminu.{neon_type}"
+              arch: arm
+
+  - name: "vpmin{neon_type.no}"
+    doc: "Folding minimum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmin]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fminp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpmins.{neon_type}"
+              arch: arm
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smaxp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int16x4_t
+      - int32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.smaxp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpmaxs.{neon_type}"
+              arch: arm
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [umaxp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - uint8x8_t
+      - uint16x4_t
+      - uint32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.umaxp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpmaxu.{neon_type}"
+              arch: arm
+
+  - name: "vpmax{neon_type.no}"
+    doc: "Folding maximum of adjacent pairs"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpmax]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - float32x2_t
+    compose:
+      - LLVMLink:
+          name: "vabs{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpmaxs.{neon_type}"
+              arch: arm
+
+  - name: "vraddhn{neon_type[0].noq}"
+    doc: "Rounding Add returning High Narrow."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8_t, int8x8_t, 'vraddhn.i16'] 
+      - [int32x4_t, int16x4_t, 'vraddhn.i32'] 
+      - [int64x2_t, int32x2_t, 'vraddhn.i64'] 
+    compose:
+      - LLVMLink:
+          name: "vraddhn{neon_type[0].noq}"
+          links:
+            - link: "llvm.aarch64.neon.raddhn.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vraddhn.{neon_type[1]}"
+              arch: arm
+
+  - name: "vraddhn{neon_type[0].noq}"
+    doc: "Rounding Add returning High Narrow."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8_t, uint8x8_t, 'vraddhn.i16', int16x8_t] 
+      - [uint32x4_t, uint16x4_t, 'vraddhn.i32', int32x4_t]  
+      - [uint64x2_t, uint32x2_t, 'vraddhn.i64', int64x2_t]  
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vraddhn{neon_type[3].noq}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vraddhn_high{neon_type[1].noq}"
+    doc: "Rounding Add returning High Narrow (high half)."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]']
+    compose:
+      - Let:
+          - x
+          - "{neon_type[0]}"
+          - FnCall:
+              - transmute
+              - - FnCall:
+                    - "vraddhn{neon_type[4].noq}"
+                    - - FnCall: [transmute, [b]]
+                      - FnCall: [transmute, [c]]
+      - FnCall: ["simd_shuffle!", [a, x, '{type[5]}']]
+
+  - name: "vraddhn_high{neon_type[1].noq}"
+    doc: "Rounding Add returning High Narrow (high half)."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
+      - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]']
+      - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]']
+    compose:
+      - Let:
+          - x
+          - FnCall:
+              - "vraddhn{neon_type[1].noq}"
+              - - b
+                - c
+      - FnCall: ["simd_shuffle!", [a, x, '{type[4]}']]
+
+  - name: "vpadd{neon_type.no}"
+    doc: "Add pairwise."
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [addp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - int8x8_t
+      - int16x4_t
+      - int32x2_t
+    compose:
+      - LLVMLink:
+          name: "vpadd{neon_type.no}"
+          links:
+            - link: "llvm.aarch64.neon.addp.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpadd.{neon_type}"
+              arch: arm
+
+  - name: "vpadd{neon_type[0].no}"
+    doc: "Add pairwise."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [addp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint32x2_t, int32x2_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vpadd{neon_type[1].no}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  # This was not publically exposed
+  - name: "priv_vpadal{neon_type[1].no}"
+    visibility: private 
+    doc: "Signed Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+      - *neon-unstable-is-arm
+    types:
+      - [int16x4_t, int8x8_t,  '"vpadal.s8"']
+      - [int32x2_t, int16x4_t, '"vpadal.s16"']
+      - [int64x1_t, int32x2_t, '"vpadal.s32"']
+      - [int16x8_t, int8x16_t, '"vpadal.s8"']
+      - [int32x4_t, int16x8_t, '"vpadal.s16"']
+      - [int64x2_t, int32x4_t, '"vpadal.s32"']
+    compose:
+      - LLVMLink:
+          name: "vpadal{neon_type[1].no}"
+          links:
+            - link: "llvm.arm.neon.vpadals.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+
+  # This was not publically exposed
+  - name: "priv_vpadal{neon_type[1].no}"
+    visibility: private 
+    doc: "Signed Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+      - *neon-unstable-is-arm
+    types:
+      - [uint16x4_t, uint8x8_t , '"vpadal.u8"']
+      - [uint32x2_t, uint16x4_t, '"vpadal.u16"']
+      - [uint64x1_t, uint32x2_t, '"vpadal.u32"']
+      - [uint16x8_t, uint8x16_t, '"vpadal.u8"']
+      - [uint32x4_t, uint16x8_t, '"vpadal.u16"']
+      - [uint64x2_t, uint32x4_t, '"vpadal.u32"']
+    compose:
+      - LLVMLink:
+          name: "vpadal{neon_type[1].no}"
+          links:
+            - link: "llvm.arm.neon.vpadalu.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+
+  - name: "vpaddl{neon_type[0].no}"
+    doc: "Signed Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [saddlp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - [int8x8_t, int16x4_t , '"vpaddl.s8"']
+      - [int16x4_t, int32x2_t, '"vpaddl.s16"']
+      - [int32x2_t, int64x1_t, '"vpaddl.s32"']
+      - [int8x16_t, int16x8_t, '"vpaddl.s8"']
+      - [int16x8_t, int32x4_t, '"vpaddl.s16"']
+      - [int32x4_t, int64x2_t, '"vpaddl.s32"']
+    compose:
+      - LLVMLink:
+          name: "vpaddl{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.saddlp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpaddls.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+
+  - name: "vpaddl{neon_type[0].no}"
+    doc: "Unsigned Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uaddlp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - [uint8x8_t, uint16x4_t , '"vpaddl.u8"']
+      - [uint16x4_t, uint32x2_t, '"vpaddl.u16"']
+      - [uint32x2_t, uint64x1_t, '"vpaddl.u32"']
+      - [uint8x16_t, uint16x8_t, '"vpaddl.u8"']
+      - [uint16x8_t, uint32x4_t, '"vpaddl.u16"']
+      - [uint32x4_t, uint64x2_t, '"vpaddl.u32"']
+    compose:
+      - LLVMLink:
+          name: "vpaddl{neon_type[1].no}"
+          links:
+            - link: "llvm.aarch64.neon.uaddlp.{neon_type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.vpaddlu.{neon_type[1]}.{neon_type[0]}"
+              arch: arm
+
+  - name: "vpadal{neon_type[1].no}"
+    doc: "Signed Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sadalp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - [int16x4_t, int8x8_t, 'vpadal.s8', 'let x: int16x4_t; #[cfg(target_arch = "arm")]   { x = priv_vpadal_s8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]   { x = simd_add(vpaddl_s8(b), a);}']
+      - [int32x2_t, int16x4_t, 'vpadal.s16', 'let x: int32x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_s16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddl_s16(b), a);}']
+      - [int64x1_t, int32x2_t, 'vpadal.s32', 'let x: int64x1_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_s32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddl_s32(b), a);}']
+      - [int16x8_t, int8x16_t, 'vpadal.s8', 'let x: int16x8_t; #[cfg(target_arch = "arm")]  { x = priv_vpadalq_s8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]   { x = simd_add(vpaddlq_s8(b), a);}']
+      - [int32x4_t, int16x8_t, 'vpadal.s16', 'let x: int32x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_s16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddlq_s16(b), a);}']
+      - [int64x2_t, int32x4_t, 'vpadal.s32', 'let x: int64x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_s32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddlq_s32(b), a);}']
+    compose:
+      - Identifier: ['{type[3]}', Symbol]
+      - Identifier: [x, Symbol]
+
+  - name: "vpadal{neon_type[1].no}"
+    doc: "Unsigned Add and Accumulate Long Pairwise."
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[2]}"']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uadalp]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - [uint16x4_t, uint8x8_t, 'vpadal.u8', 'let x: uint16x4_t; #[cfg(target_arch = "arm")]   { x = priv_vpadal_u8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]  { x = simd_add(vpaddl_u8(b), a);}']
+      - [uint32x2_t, uint16x4_t, 'vpadal.u16', 'let x: uint32x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_u16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddl_u16(b), a);}']
+      - [uint64x1_t, uint32x2_t, 'vpadal.u32', 'let x: uint64x1_t; #[cfg(target_arch = "arm")] { x = priv_vpadal_u32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddl_u32(b), a);}']
+      - [uint16x8_t, uint8x16_t, 'vpadal.u8', 'let x: uint16x8_t; #[cfg(target_arch = "arm")]  { x = priv_vpadalq_u8(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]  { x = simd_add(vpaddlq_u8(b), a);}']
+      - [uint32x4_t, uint16x8_t, 'vpadal.u16', 'let x: uint32x4_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_u16(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddlq_u16(b), a);}']
+      - [uint64x2_t, uint32x4_t, 'vpadal.u32', 'let x: uint64x2_t; #[cfg(target_arch = "arm")] { x = priv_vpadalq_u32(a, b); } #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] { x = simd_add(vpaddlq_u32(b), a);}']
+    compose:
+      - Identifier: ['{type[3]}', Symbol]
+      - Identifier: [x, Symbol]
+
+  - name: "vcnt{neon_type.no}"
+    doc: "Population count per byte."
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcnt]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cnt]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - int8x8_t
+      - int8x16_t
+    compose:
+      - LLVMLink:
+          name: "vcnt{neon_type.no}"
+          links:
+            - link: "llvm.ctpop.{neon_type}"
+              arch: aarch64,arm64ec
+            - link: "llvm.ctpop.{neon_type}"
+              arch: arm
+
+  - name: "vcnt{neon_type[0].no}"
+    doc: "Population count per byte."
+    arguments: ["a: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcnt]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [cnt]]}]]
+      - *neon-stable-not-arm
+      - *neon-unstable-is-arm
+    types:
+      - [uint8x8_t,  int8x8_t]
+      - [uint8x16_t, int8x16_t]
+      - [poly8x8_t,  int8x8_t]
+      - [poly8x16_t, int8x16_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - "vcnt{neon_type[1].no}"
+                - - FnCall:
+                      - transmute
+                      - - a
+
+  - name: "vmmla{neon_type[0].no}"
+    doc: "8-bit integer matrix multiply-accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [smmla]]}]]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    types:
+      - [int32x4_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "vmmla{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.smmla.{neon_type[0]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.smmla.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+
+  - name: "vmmla{neon_type[0].no}"
+    doc: "8-bit integer matrix multiply-accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ummla]]}]]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    types:
+      - [uint32x4_t, uint8x16_t]
+    compose:
+      - LLVMLink:
+          name: "vmmla{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.ummla.{neon_type[0]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.ummla.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+
+  - name: "vusmmla{neon_type[0].no}"
+    doc: "Unsigned and signed 8-bit integer matrix multiply-accumulate"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *neon-i8mm
+      - *neon-v8
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usmmla]]}]]
+      - *neon-unstable-i8mm
+      - *neon-unstable-is-arm
+    types:
+      - [int32x4_t, uint8x16_t, int8x16_t]
+    compose:
+      - LLVMLink:
+          name: "vmmla{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.usmmla.{neon_type[0]}.{neon_type[1]}"
+              arch: aarch64,arm64ec
+            - link: "llvm.arm.neon.usmmla.{neon_type[0]}.{neon_type[1]}"
+              arch: arm
+
+  - name: "vtbl1"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - int8x8_t
+    compose:
+      - LLVMLink:
+          name: "vtbl1"
+          links:
+            - link: "llvm.arm.neon.vtbl1"
+              arch: arm
+  
+  - name: "vtbl1_s8"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - int8x8_t
+    compose:
+      - FnCall: [vtbl1, [a, b]]
+
+  - name: "vtbl1{neon_type[0].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: uint8x8_t"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [uint8x8_t, uint8x8_t]
+      - [poly8x8_t, poly8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vtbl1
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtbl2"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - int8x8_t
+    compose:
+      - LLVMLink:
+          name: "vtbl2"
+          links:
+            - link: "llvm.arm.neon.vtbl2"
+              arch: arm
+
+  - name: "vtbl2_s8"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [int8x8x2_t, int8x8_t]
+    compose:
+      - FnCall: [vtbl2, ['a.0', 'a.1', b]]
+
+  - name: "vtbl2{neon_type[1].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: uint8x8_t"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [uint8x8x2_t, uint8x8_t]
+      - [poly8x8x2_t, poly8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vtbl2
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtbl3"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - int8x8_t
+    compose:
+      - LLVMLink:
+          name: "vtbl3"
+          links:
+            - link: "llvm.arm.neon.vtbl3"
+              arch: arm
+
+  - name: "vtbl3_s8"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [int8x8x3_t, int8x8_t]
+    compose:
+      - FnCall: [vtbl3, ['a.0', 'a.1', 'a.2', b]]
+
+  - name: "vtbl3{neon_type[1].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: uint8x8_t"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [uint8x8x3_t, uint8x8_t]
+      - [poly8x8x3_t, poly8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vtbl3
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - FnCall: [transmute, ['a.2']]
+                  - FnCall: [transmute, [b]]
+
+  - name: "vtbl4"
+    visibility: private
+    doc: "Table look-up"
+    arguments: ["a: {neon_type}", "b: {neon_type}", "c: {neon_type}", "d: {neon_type}", "e: {neon_type}"]
+    return_type: "{neon_type}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - int8x8_t
+    compose:
+      - LLVMLink:
+          name: "vtbl4"
+          links:
+            - link: "llvm.arm.neon.vtbl4"
+              arch: arm
+
+  - name: "vtbl4_s8"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [int8x8x4_t, int8x8_t]
+    compose:
+      - FnCall: [vtbl4, ['a.0', 'a.1', 'a.2', 'a.3', b]]
+
+  - name: "vtbl4{neon_type[1].no}"
+    doc: "Table look-up"
+    arguments: ["a: {neon_type[0]}", "b: uint8x8_t"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+    assert_instr: [vtbl]
+    types:
+      - [uint8x8x4_t, uint8x8_t]
+      - [poly8x8x4_t, poly8x8_t]
+    compose:
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - vtbl4
+                - - FnCall: [transmute, ['a.0']]
+                  - FnCall: [transmute, ['a.1']]
+                  - FnCall: [transmute, ['a.2']]
+                  - FnCall: [transmute, ['a.3']]
+                  - FnCall: [transmute, [b]]
+    
+  - name: "vst1{type[0]}"
+    visibility: private
+    doc: "Store multiple single-element structures from one, two, three, or four registers."
+    arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]]
+    types:
+      - ['_v8i8', '* const i8', int8x8_t, i32, '8']
+      - ['q_v16i8', '* const i8', int8x16_t, i32, '8']
+      - ['_v4i16', '* const i8', int16x4_t, i32, '16']
+      - ['q_v8i16', '* const i8', int16x8_t, i32, '16']
+      - ['_v2i32', '* const i8', int32x2_t, i32, '32']
+      - ['q_v4i32', '* const i8', int32x4_t, i32, '32']
+      - ['_v1i64', '* const i8', int64x1_t, i32, '64']
+      - ['q_v2i64', '* const i8', int64x2_t, i32, '64']
+      - ['_v2f32', '* const i8', float32x2_t, i32, '32']
+      - ['q_v4f32', '* const i8', float32x4_t, i32, '32']
+    compose:
+      - LLVMLink:
+          name: "_vst1{type[0]}"
+          links:
+            - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0"
+              arch: arm
+
+  - name: "vst1{neon_type[1].no}"
+    doc: "Store multiple single-element structures from one, two, three, or four registers."
+    arguments: ["ptr: {type[0]}", "a: {neon_type[1]}"]
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]]
+    types:
+      - ['*mut i8',  int8x8_t,   '8', 'a', 'crate::mem::align_of::<i8>() as i32', '_v8i8']
+      - ['*mut i8',  int8x16_t,  '8', 'a', 'crate::mem::align_of::<i8>() as i32', 'q_v16i8']
+      - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::<i16>() as i32', '_v4i16']
+      - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::<i16>() as i32', 'q_v8i16']
+      - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::<i32>() as i32', '_v2i32']
+      - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::<i32>() as i32', 'q_v4i32']
+      - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::<i64>() as i32', '_v1i64']
+      - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::<i64>() as i32', 'q_v2i64']
+      - ['*mut u8',  uint8x8_t,   '8', 'transmute(a)', 'crate::mem::align_of::<u8>() as i32', '_v8i8']
+      - ['*mut u8',  uint8x16_t,  '8', 'transmute(a)', 'crate::mem::align_of::<u8>() as i32', 'q_v16i8']
+      - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::<u16>() as i32', '_v4i16']
+      - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::<u16>() as i32', 'q_v8i16']
+      - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::<u32>() as i32', '_v2i32']
+      - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::<u32>() as i32', 'q_v4i32']
+      - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::<u64>() as i32', '_v1i64']
+      - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::<u64>() as i32', 'q_v2i64']
+      - ['*mut p8',  poly8x8_t,    '8', 'transmute(a)', 'crate::mem::align_of::<p8>() as i32', '_v8i8']
+      - ['*mut p8',  poly8x16_t,   '8', 'transmute(a)', 'crate::mem::align_of::<p8>() as i32', 'q_v16i8']
+      - ['*mut p16', poly16x4_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>() as i32', '_v4i16']
+      - ['*mut p16', poly16x8_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>() as i32', 'q_v8i16']
+      - ['*mut p64', poly64x1_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>() as i32', '_v1i64']
+      - ['*mut p64', poly64x2_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>() as i32', 'q_v2i64']
+      - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>() as i32', '_v2f32']
+      - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>() as i32', 'q_v4f32']
+    compose:
+      - FnCall:
+          - "vst1{type[5]}"
+          - - 'ptr as *const i8'
+            - '{type[3]}'
+            - '{type[4]}'
+
+  - name: "vshiftins{type[0]}"
+    visibility: private
+    doc: "Shift Right and Insert (immediate)"
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      #- *target-is-arm
+      #- *neon-v7
+      - *neon-unstable
+    types:
+      - ['_v8i8',  "int8x8_t", '8']
+      - ['_v16i8', 'int8x16_t', '8']
+      - ['_v4i16', 'int16x4_t', '16']
+      - ['_v8i16', 'int16x8_t', '16']
+      - ['_v2i32', 'int32x2_t', '32']
+      - ['_v4i32', 'int32x4_t', '32']
+      - ['_v1i64', 'int64x1_t', '64']
+      - ['_v2i64', 'int64x2_t', '64']
+    compose:
+      - LLVMLink:
+          name: "_vshiftins{type[0]}"
+          links:
+            - link: "llvm.arm.neon.vshiftins.{neon_type[1]}"
+              arch: arm
+
+  - name: "vsri{neon_type[0].N}"
+    doc: "Shift Right and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - FnCall: [target_feature, ['enable = "{type[1]}"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsri.{type[2]}"', 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const N: i32']
+    types:
+      - [uint8x8_t,  "neon,v7",  '8', '1 <= N && N <= 8',   'v8i8', 'int8x8_t::splat',  '-N as i8']
+      - [uint8x16_t, "neon,v7",  '8', '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
+      - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
+      - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
+      - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
+      - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
+      - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
+      - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
+      - [poly8x8_t,  "neon,v7",  '8', '1 <= N && N <= 8',   'v8i8', 'int8x8_t::splat',  '-N as i8']
+      - [poly8x16_t, "neon,v7",  '8', '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
+      - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
+      - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
+      ## These live in ./crates/core_arch/src/arm/neon.rs
+      #- [poly64x1_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
+      #- [poly64x2_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
+    compose:
+      - FnCall: ["static_assert!", ['{type[3]}']]
+      - FnCall:
+          - 'transmute'
+          - - FnCall:
+                - "vshiftins_{type[4]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+                  - FnCall: ["{type[5]}", ["{type[6]}"]]
+
+  - name: "vsri{neon_type[0].N}"
+    doc: "Shift Right and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ['const N: i32']
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - *target-is-arm
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsri.{type[1]}"', 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,  '8',  '1 <= N && N <= 8',  'v8i8',  'int8x8_t::splat',  '-N as i8']
+      - [int8x16_t, '8',  '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
+      - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
+      - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
+      - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N as i32']
+      - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N as i32']
+      - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
+      - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
+    compose:
+      - FnCall: ["static_assert!", ['{type[2]}']]
+      - FnCall:
+          - "vshiftins_{type[3]}"
+          - - a
+            - b
+            - FnCall: ["{type[4]}", ["{type[5]}"]]
+
+  - name: "vsli{neon_type[0].N}"
+    doc: "Shift Left and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - FnCall: [target_feature, ['enable = "{type[1]}"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsli.{type[2]}"', 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const N: i32']
+    types:
+      - [uint8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v8i8',  'int8x8_t::splat',  'N as i8']
+      - [uint8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
+      - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
+      - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
+      - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N as i32']
+      - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N as i32']
+      - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
+      - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
+      - [poly8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8', 'int8x8_t::splat',  'N as i8']
+      - [poly8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
+      - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
+      - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
+      ## These live in ./crates/core_arch/src/arm/neon.rs
+      #- [poly64x1_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
+      #- [poly64x2_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
+    compose:
+      - FnCall: ["{type[3]}", ['{type[4]}']]
+      - FnCall:
+          - 'transmute'
+          - - FnCall:
+                - "vshiftins_{type[5]}"
+                - - FnCall: [transmute, [a]]
+                  - FnCall: [transmute, [b]]
+                  - FnCall: ["{type[6]}", ["{type[7]}"]]
+  
+  - name: "vsli{neon_type[0].N}"
+    doc: "Shift Left and Insert (immediate)"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - *target-is-arm
+      - FnCall: [target_feature, ['enable = "neon,v7"']]
+      - *neon-unstable
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsli.{type[1]}"', 'N = 1']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const N: i32']
+    types:
+      - [int8x8_t,  '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8', 'int8x8_t::splat',  'N as i8']
+      - [int8x16_t, '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
+      - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
+      - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
+      - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
+      - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
+      - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
+      - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
+    compose:
+      - FnCall: ["{type[2]}", ['{type[3]}']]
+      - FnCall:
+          - "vshiftins_{type[4]}"
+          - - a
+            - b
+            - FnCall: ["{type[5]}", ["{type[6]}"]]
diff --git a/crates/stdarch-gen-arm/src/big_endian.rs b/crates/stdarch-gen-arm/src/big_endian.rs
new file mode 100644
index 0000000000..da438586e4
--- /dev/null
+++ b/crates/stdarch-gen-arm/src/big_endian.rs
@@ -0,0 +1,201 @@
+use crate::expression::LetVariant;
+use crate::wildstring::WildStringPart;
+use crate::{
+    expression::{Expression, IdentifierType},
+    typekinds::*,
+    wildstring::WildString,
+};
+
+/// Simplifies creating a string that can be used in an Expression, as Expression
+/// expects all strings to be `WildString`
+fn create_single_wild_string(name: &str) -> WildString {
+    WildString(vec![WildStringPart::String(name.to_string())])
+}
+
+/// Creates an Identifier with name `name` with no wildcards. This, for example,
+/// can be used to create variables, function names or arbitrary input. Is is
+/// extremely flexible.
+pub fn create_symbol_identifier(arbitrary_string: &str) -> Expression {
+    let identifier_name = create_single_wild_string(arbitrary_string);
+    Expression::Identifier(identifier_name, IdentifierType::Symbol)
+}
+
+/// To compose the simd_shuffle! call we need:
+/// - simd_shuffle!(<arg1>, <arg2>, <array>)
+///
+/// Here we are creating a string version of the `<array>` that can be used as an
+/// Expression Identifier
+///
+/// In textual form `a: int32x4_t` which has 4 lanes would generate:
+/// ```
+/// [0, 1, 2, 3]
+/// ```
+fn create_array(lanes: u32) -> Option<String> {
+    match lanes {
+        1 => None, /* Makes no sense to shuffle an array of size 1 */
+        2 => Some("[1, 0]".to_string()),
+        3 => Some("[2, 1, 0]".to_string()),
+        4 => Some("[3, 2, 1, 0]".to_string()),
+        8 => Some("[7, 6, 5, 4, 3, 2, 1, 0]".to_string()),
+        16 => Some("[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]".to_string()),
+        _ => panic!("Incorrect vector number of vector lanes: {}", lanes),
+    }
+}
+
+/// Creates: `let <variable_name>: <type> = <expression>`
+pub fn create_let_variable(
+    variable_name: &str,
+    type_kind: &TypeKind,
+    expression: Expression,
+) -> Expression {
+    let identifier_name = create_single_wild_string(variable_name);
+    Expression::Let(LetVariant::WithType(
+        identifier_name,
+        type_kind.clone(),
+        Box::new(expression),
+    ))
+}
+
+pub fn create_mut_let_variable(
+    variable_name: &str,
+    type_kind: &TypeKind,
+    expression: Expression,
+) -> Expression {
+    let identifier_name = create_single_wild_string(variable_name);
+    Expression::Let(LetVariant::MutWithType(
+        identifier_name,
+        type_kind.clone(),
+        Box::new(expression),
+    ))
+}
+
+pub fn type_has_tuple(type_kind: &TypeKind) -> bool {
+    if let TypeKind::Vector(vector_type) = type_kind {
+        vector_type.tuple_size().is_some()
+    } else {
+        false
+    }
+}
+
+pub fn make_variable_mutable(variable_name: &str, type_kind: &TypeKind) -> Expression {
+    let mut_variable = format!(
+        "let mut {}: {} = {}",
+        variable_name,
+        type_kind.to_string(),
+        variable_name
+    );
+    let identifier_name = create_single_wild_string(&mut_variable);
+    Expression::Identifier(identifier_name, IdentifierType::Symbol)
+}
+
+/// For creating shuffle calls, accepts function pointers for formatting for tuple
+/// types and types without a tuple
+///
+/// Example:
+///
+/// `a: int32x4_t` with formatting function `create_shuffle_call_fmt` creates:
+/// ```
+/// simd_shuffle!(a, a, [0, 1, 2, 3])
+/// ```
+///
+/// `a: int32x4x2_t` creates:
+/// ```
+/// a.0 = simd_shuffle!(a.0, a.0, [0, 1, 2, 3])
+/// a.1 = simd_shuffle!(a.1, a.1, [0, 1, 2, 3])
+/// ```
+fn create_shuffle_internal(
+    variable_name: &String,
+    type_kind: &TypeKind,
+    fmt_tuple: fn(variable_name: &String, idx: u32, array_lanes: &String) -> String,
+    fmt: fn(variable_name: &String, type_kind: &TypeKind, array_lanes: &String) -> String,
+) -> Option<Expression> {
+    let TypeKind::Vector(vector_type) = type_kind else {
+        return None;
+    };
+
+    let lane_count = vector_type.lanes();
+    let Some(array_lanes) = create_array(lane_count) else {
+        return None;
+    };
+
+    let tuple_count = vector_type.tuple_size().map_or_else(|| 0, |t| t.to_int());
+
+    if tuple_count > 0 {
+        let capacity_estimate: usize =
+            tuple_count as usize * (lane_count as usize + ((variable_name.len() + 2) * 3));
+        let mut string_builder = String::with_capacity(capacity_estimate);
+
+        /* <var_name>.idx = simd_shuffle!(<var_name>.idx, <var_name>.idx, [<indexes>]) */
+        for idx in 0..tuple_count {
+            let formatted = fmt_tuple(variable_name, idx, &array_lanes);
+            string_builder += formatted.as_str();
+        }
+        Some(create_symbol_identifier(&string_builder))
+    } else {
+        /* Generate a list of shuffles for each tuple */
+        let expression = fmt(variable_name, type_kind, &array_lanes);
+        Some(create_symbol_identifier(&expression))
+    }
+}
+
+fn create_assigned_tuple_shuffle_call_fmt(
+    variable_name: &String,
+    idx: u32,
+    array_lanes: &String,
+) -> String {
+    format!(
+        "{variable_name}.{idx} = simd_shuffle!({variable_name}.{idx}, {variable_name}.{idx}, {array_lanes});\n",
+        variable_name = variable_name,
+        idx = idx,
+        array_lanes = array_lanes
+    )
+}
+
+fn create_assigned_shuffle_call_fmt(
+    variable_name: &String,
+    type_kind: &TypeKind,
+    array_lanes: &String,
+) -> String {
+    format!(
+        "let {variable_name}: {type_kind} = simd_shuffle!({variable_name}, {variable_name}, {array_lanes})",
+        type_kind = type_kind.to_string(),
+        variable_name = variable_name,
+        array_lanes = array_lanes
+    )
+}
+
+fn create_shuffle_call_fmt(
+    variable_name: &String,
+    _type_kind: &TypeKind,
+    array_lanes: &String,
+) -> String {
+    format!(
+        "simd_shuffle!({variable_name}, {variable_name}, {array_lanes})",
+        variable_name = variable_name,
+        array_lanes = array_lanes
+    )
+}
+
+/// Create a `simd_shuffle!(<...>, [...])` call, where the output is stored
+/// in a variable named `variable_name`
+pub fn create_assigned_shuffle_call(
+    variable_name: &String,
+    type_kind: &TypeKind,
+) -> Option<Expression> {
+    create_shuffle_internal(
+        variable_name,
+        type_kind,
+        create_assigned_tuple_shuffle_call_fmt,
+        create_assigned_shuffle_call_fmt,
+    )
+}
+
+/// Create a `simd_shuffle!(<...>, [...])` call
+pub fn create_shuffle_call(variable_name: &String, type_kind: &TypeKind) -> Option<Expression> {
+    create_shuffle_internal(
+        variable_name,
+        type_kind,
+        create_assigned_tuple_shuffle_call_fmt,
+        create_shuffle_call_fmt,
+    )
+}
diff --git a/crates/stdarch-gen-arm/src/context.rs b/crates/stdarch-gen-arm/src/context.rs
index aa29eda820..44b5208f39 100644
--- a/crates/stdarch-gen-arm/src/context.rs
+++ b/crates/stdarch-gen-arm/src/context.rs
@@ -35,6 +35,10 @@ pub struct GlobalContext {
     pub arch_cfgs: Vec<ArchitectureSettings>,
     #[serde(default)]
     pub uses_neon_types: bool,
+
+    /// Should the yaml file automagically generate big endian shuffling
+    #[serde(default)]
+    pub auto_big_endian: Option<bool>,
 }
 
 /// Context of an intrinsic group
diff --git a/crates/stdarch-gen-arm/src/expression.rs b/crates/stdarch-gen-arm/src/expression.rs
index 4a572db3e8..b796bf675c 100644
--- a/crates/stdarch-gen-arm/src/expression.rs
+++ b/crates/stdarch-gen-arm/src/expression.rs
@@ -9,6 +9,7 @@ use std::fmt;
 use std::str::FromStr;
 
 use crate::intrinsic::Intrinsic;
+use crate::wildstring::WildStringPart;
 use crate::{
     context::{self, Context, VariableType},
     intrinsic::{Argument, LLVMLink, StaticDefinition},
@@ -29,6 +30,7 @@ pub enum IdentifierType {
 pub enum LetVariant {
     Basic(WildString, Box<Expression>),
     WithType(WildString, TypeKind, Box<Expression>),
+    MutWithType(WildString, TypeKind, Box<Expression>),
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -155,9 +157,11 @@ impl Expression {
                 cl_ptr_ex.pre_build(ctx)?;
                 arg_exs.iter_mut().try_for_each(|ex| ex.pre_build(ctx))
             }
-            Self::Let(LetVariant::Basic(_, ex) | LetVariant::WithType(_, _, ex)) => {
-                ex.pre_build(ctx)
-            }
+            Self::Let(
+                LetVariant::Basic(_, ex)
+                | LetVariant::WithType(_, _, ex)
+                | LetVariant::MutWithType(_, _, ex),
+            ) => ex.pre_build(ctx),
             Self::CastAs(ex, _) => ex.pre_build(ctx),
             Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.pre_build(ctx)?;
@@ -214,7 +218,8 @@ impl Expression {
             Self::Let(variant) => {
                 let (var_name, ex, ty) = match variant {
                     LetVariant::Basic(var_name, ex) => (var_name, ex, None),
-                    LetVariant::WithType(var_name, ty, ex) => {
+                    LetVariant::WithType(var_name, ty, ex)
+                    | LetVariant::MutWithType(var_name, ty, ex) => {
                         if let Some(w) = ty.wildcard() {
                             ty.populate_wildcard(ctx.local.provide_type_wildcard(w)?)?;
                         }
@@ -285,9 +290,11 @@ impl Expression {
             // Nested structures that aren't inherently unsafe, but could contain other expressions
             // that might be.
             Self::Assign(_var, exp) => exp.requires_unsafe_wrapper(ctx_fn),
-            Self::Let(LetVariant::Basic(_, exp) | LetVariant::WithType(_, _, exp)) => {
-                exp.requires_unsafe_wrapper(ctx_fn)
-            }
+            Self::Let(
+                LetVariant::Basic(_, exp)
+                | LetVariant::WithType(_, _, exp)
+                | LetVariant::MutWithType(_, _, exp),
+            ) => exp.requires_unsafe_wrapper(ctx_fn),
             Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)),
             Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn)
@@ -330,6 +337,32 @@ impl Expression {
             }
         }
     }
+
+    /// Determine if an expression is a `static_assert<...>` function call.
+    pub fn is_static_assert(&self) -> bool {
+        match self {
+            Expression::FnCall(fn_call) => match fn_call.0.as_ref() {
+                Expression::Identifier(wild_string, _) => {
+                    if let WildStringPart::String(function_name) = &wild_string.0[0] {
+                        function_name.starts_with("static_assert")
+                    } else {
+                        false
+                    }
+                }
+                _ => panic!("Badly defined function call: {:?}", fn_call),
+            },
+            _ => false,
+        }
+    }
+
+    /// Determine if an espression is a LLVM binding
+    pub fn is_llvm_link(&self) -> bool {
+        if let Expression::LLVMLink(_) = self {
+            true
+        } else {
+            false
+        }
+    }
 }
 
 impl FromStr for Expression {
@@ -422,6 +455,10 @@ impl ToTokens for Expression {
                 let var_ident = format_ident!("{}", var_name.to_string());
                 tokens.append_all(quote! { let #var_ident: #ty = #exp })
             }
+            Self::Let(LetVariant::MutWithType(var_name, ty, exp)) => {
+                let var_ident = format_ident!("{}", var_name.to_string());
+                tokens.append_all(quote! { let mut #var_ident: #ty = #exp })
+            }
             Self::Assign(var_name, exp) => {
                 /* If we are dereferencing a variable to assign a value \
                  * the 'format_ident!' macro does not like the asterix */
diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs
index cabe58f9d6..ad833f52ad 100644
--- a/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -10,6 +10,10 @@ use std::ops::RangeInclusive;
 use std::str::FromStr;
 
 use crate::assert_instr::InstructionAssertionsForBaseType;
+use crate::big_endian::{
+    create_assigned_shuffle_call, create_let_variable, create_mut_let_variable,
+    create_shuffle_call, create_symbol_identifier, make_variable_mutable, type_has_tuple,
+};
 use crate::context::{GlobalContext, GroupContext};
 use crate::input::{InputSet, InputSetEntry};
 use crate::predicate_forms::{DontCareMethod, PredicateForm, PredicationMask, ZeroingMethod};
@@ -284,6 +288,7 @@ pub struct Signature {
     pub name: WildString,
     /// List of function arguments, leave unset or empty for no arguments
     pub arguments: Vec<Argument>,
+
     /// Function return type, leave unset for void
     pub return_type: Option<TypeKind>,
 
@@ -493,12 +498,14 @@ impl LLVMLink {
         let mut sig_name = ctx.local.signature.name.clone();
         sig_name.prepend_str("_");
 
+        let argv = self
+            .arguments
+            .clone()
+            .unwrap_or_else(|| ctx.local.signature.arguments.clone());
+
         let mut sig = Signature {
             name: sig_name,
-            arguments: self
-                .arguments
-                .clone()
-                .unwrap_or_else(|| ctx.local.signature.arguments.clone()),
+            arguments: argv,
             return_type: self
                 .return_type
                 .clone()
@@ -905,6 +912,13 @@ pub struct Intrinsic {
     pub base_type: Option<BaseType>,
     /// Attributes for the function
     pub attr: Option<Vec<Expression>>,
+    /// Big endian variant for composing, this gets populated internally
+    #[serde(skip)]
+    pub big_endian_compose: Vec<Expression>,
+    /// Big endian sometimes needs the bits inverted in a way that cannot be
+    /// automatically detected
+    #[serde(default)]
+    pub big_endian_inverse: Option<bool>,
 }
 
 impl Intrinsic {
@@ -1014,6 +1028,12 @@ impl Intrinsic {
 
         variant.post_build(&mut ctx)?;
 
+        /* If we should generate big endian we shall do so. It's possible
+         * we may not want to in some instances */
+        if ctx.global.auto_big_endian.unwrap_or(false) {
+            self.generate_big_endian(&mut variant);
+        }
+
         if let Some(n_variant_op) = ctx.local.n_variant_op().cloned() {
             variant.generate_n_variant(n_variant_op, &mut ctx)
         } else {
@@ -1021,6 +1041,147 @@ impl Intrinsic {
         }
     }
 
+    /// Add a big endian implementation
+    fn generate_big_endian(&self, variant: &mut Intrinsic) {
+        /* We can't always blindly reverse the bits only in certain conditions
+         * do we need a different order - thus this allows us to have the
+         * ability to do so without having to play codegolf with the yaml AST */
+        let should_reverse = {
+            if let Some(should_reverse) = variant.big_endian_inverse {
+                should_reverse
+            } else if variant.compose.len() == 1 {
+                match &variant.compose[0] {
+                    Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute",
+                    _ => false,
+                }
+            } else {
+                false
+            }
+        };
+
+        if !should_reverse {
+            return;
+        }
+
+        let mut big_endian_expressions: Vec<Expression> = Vec::new();
+
+        /* We cannot assign `a.0 = ` directly to a function parameter so
+         * need to make them mutable */
+        for function_parameter in &variant.signature.arguments {
+            if type_has_tuple(&function_parameter.kind) {
+                /* We do not want to be creating a `mut` variant if the type
+                 * has one lane. If it has one lane that means it does not need
+                 * shuffling */
+                if let TypeKind::Vector(vector_type) = &function_parameter.kind {
+                    if vector_type.lanes() == 1 {
+                        continue;
+                    }
+                }
+
+                let mutable_variable = make_variable_mutable(
+                    &function_parameter.name.to_string(),
+                    &function_parameter.kind,
+                );
+                big_endian_expressions.push(mutable_variable);
+            }
+        }
+
+        /* Possibly shuffle the vectors */
+        for function_parameter in &variant.signature.arguments {
+            if let Some(shuffle_call) = create_assigned_shuffle_call(
+                &function_parameter.name.to_string(),
+                &function_parameter.kind,
+            ) {
+                big_endian_expressions.push(shuffle_call);
+            }
+        }
+
+        if !big_endian_expressions.is_empty() {
+            Vec::reserve(
+                &mut variant.big_endian_compose,
+                big_endian_expressions.len() + variant.compose.len(),
+            );
+            let mut expression = &variant.compose[0];
+            let needs_reordering = expression.is_static_assert() || expression.is_llvm_link();
+
+            /* We want to keep the asserts and llvm links at the start of
+             * the new big_endian_compose vector that we are creating */
+            if needs_reordering {
+                let mut expression_idx = 0;
+                while expression.is_static_assert() || expression.is_llvm_link() {
+                    /* Add static asserts and llvm links to the start of the
+                     * vector */
+                    variant.big_endian_compose.push(expression.clone());
+                    expression_idx += 1;
+                    expression = &variant.compose[expression_idx];
+                }
+
+                /* Add the big endian specific expressions */
+                variant.big_endian_compose.extend(big_endian_expressions);
+
+                /* Add the rest of the expressions */
+                for i in expression_idx..variant.compose.len() {
+                    variant.big_endian_compose.push(variant.compose[i].clone());
+                }
+            } else {
+                /* If we do not need to reorder anything then immediately add
+                 * the expressions from the big_endian_expressions and
+                 * concatinate the compose vector */
+                variant.big_endian_compose.extend(big_endian_expressions);
+                variant
+                    .big_endian_compose
+                    .extend(variant.compose.iter().cloned());
+            }
+        }
+
+        /* If we have a return type, there is a possibility we want to generate
+         * a shuffle call */
+        if let Some(return_type) = &variant.signature.return_type {
+            let return_value = variant
+                .compose
+                .last()
+                .expect("Cannot define a return type with an empty function body");
+
+            /* If we do not create a shuffle call we do not need modify the
+             * return value and append to the big endian ast array. A bit confusing
+             * as in code we are making the final call before caputuring the return
+             * value of the intrinsic that has been called.*/
+            let ret_val_name = "ret_val".to_string();
+            if let Some(simd_shuffle_call) = create_shuffle_call(&ret_val_name, return_type) {
+                /* There is a possibility that the funcion arguments did not
+                 * require big endian treatment, thus we need to now add the
+                 * original function body before appending the return value.*/
+                if variant.big_endian_compose.is_empty() {
+                    variant
+                        .big_endian_compose
+                        .extend(variant.compose.iter().cloned());
+                }
+
+                /* Now we shuffle the return value - we are creating a new
+                 * return value for the intrinsic. */
+                let return_value_variable = if type_has_tuple(&return_type) {
+                    create_mut_let_variable(&ret_val_name, return_type, return_value.clone())
+                } else {
+                    create_let_variable(&ret_val_name, return_type, return_value.clone())
+                };
+
+                /* Remove the last item which will be the return value */
+                variant.big_endian_compose.pop();
+                variant.big_endian_compose.push(return_value_variable);
+                variant.big_endian_compose.push(simd_shuffle_call);
+                if type_has_tuple(return_type) {
+                    /* We generated `tuple_count` number of calls to shuffle
+                     * re-assigning each tuple however those generated calls do
+                     * not make the parent function return. So we add the return
+                     * value here */
+                    variant
+                        .big_endian_compose
+                        .push(create_symbol_identifier(&ret_val_name));
+                }
+            }
+        }
+    }
+
     /// Implement a "zeroing" (_z) method by calling an existing "merging" (_m) method, as required.
     fn generate_zeroing_pass_through(
         &mut self,
@@ -1505,120 +1666,155 @@ impl Intrinsic {
     }
 }
 
-impl ToTokens for Intrinsic {
-    fn to_tokens(&self, tokens: &mut TokenStream) {
-        let signature = &self.signature;
-        let fn_name = signature.fn_name().to_string();
-        let target_feature = self.target_features.join(",");
-        let safety = self
-            .safety
-            .as_ref()
-            .expect("safety should be determined during `pre_build`");
-
-        if let Some(doc) = &self.doc {
-            let mut doc = vec![doc.to_string()];
-
-            doc.push(format!("[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/{})", &signature.doc_name()));
+/// Some intrinsics require a little endian and big endian implementation, others
+/// do not
+enum Endianness {
+    Little,
+    Big,
+    NA,
+}
 
-            if safety.has_doc_comments() {
-                doc.push("## Safety".to_string());
-                for comment in safety.doc_comments() {
-                    doc.push(format!("  * {comment}"));
-                }
-            } else {
-                assert!(
-                    safety.is_safe(),
-                    "{fn_name} is both public and unsafe, and so needs safety documentation"
-                );
+/// Based on the endianess will create the appropriate intrinsic, or simply
+/// create the desired intrinsic without any endianess
+fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut TokenStream) {
+    let signature = &intrinsic.signature;
+    let fn_name = signature.fn_name().to_string();
+    let target_feature = intrinsic.target_features.join(",");
+    let safety = intrinsic
+        .safety
+        .as_ref()
+        .expect("safety should be determined during `pre_build`");
+
+    if let Some(doc) = &intrinsic.doc {
+        let mut doc = vec![doc.to_string()];
+
+        doc.push(format!("[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/{})", &signature.doc_name()));
+
+        if safety.has_doc_comments() {
+            doc.push("## Safety".to_string());
+            for comment in safety.doc_comments() {
+                doc.push(format!("  * {comment}"));
             }
-
-            tokens.append_all(quote! { #(#[doc = #doc])* });
         } else {
             assert!(
-                matches!(self.visibility, FunctionVisibility::Private),
-                "{fn_name} needs to be private, or to have documentation."
-            );
-            assert!(
-                !safety.has_doc_comments(),
-                "{fn_name} needs a documentation section for its safety comments."
+                safety.is_safe(),
+                "{fn_name} is both public and unsafe, and so needs safety documentation"
             );
         }
 
-        tokens.append_all(quote! { #[inline] });
-
-        /* If we have manually defined attributes on the block of yaml with
-         * 'attr:' we want to add them */
-        if let Some(attr) = &self.attr {
-            /* Scan to see if we have defined `FnCall: [target_feature, ['<bespoke>']]`*/
-            if !has_target_feature_attr(attr) {
-                /* If not add the default one that is defined at the top of
-                 * the yaml file. This does mean we scan the attributes vector
-                 * twice, once to see if the `target_feature` exists and again
-                 * to actually append the tokens. We could impose that the
-                 * `target_feature` call has to be the first argument of the
-                 * `attr` block */
-                tokens.append_all(quote! {
-                    #[target_feature(enable = #target_feature)]
-                });
-            }
+        tokens.append_all(quote! { #(#[doc = #doc])* });
+    } else {
+        assert!(
+            matches!(intrinsic.visibility, FunctionVisibility::Private),
+            "{fn_name} needs to be private, or to have documentation."
+        );
+        assert!(
+            !safety.has_doc_comments(),
+            "{fn_name} needs a documentation section for its safety comments."
+        );
+    }
 
-            /* Target feature will get added here */
-            let attr_expressions = &mut attr.iter().peekable();
-            while let Some(ex) = attr_expressions.next() {
-                let mut inner = TokenStream::new();
-                ex.to_tokens(&mut inner);
-                tokens.append(Punct::new('#', Spacing::Alone));
-                tokens.append(Group::new(Delimiter::Bracket, inner));
-            }
-        } else {
+    tokens.append_all(quote! { #[inline] });
+
+    match endianness {
+        Endianness::Little => tokens.append_all(quote! { #[cfg(target_endian = "little")] }),
+        Endianness::Big => tokens.append_all(quote! { #[cfg(target_endian = "big")] }),
+        Endianness::NA => {}
+    };
+
+    let expressions = match endianness {
+        Endianness::Little | Endianness::NA => &intrinsic.compose,
+        Endianness::Big => &intrinsic.big_endian_compose,
+    };
+
+    /* If we have manually defined attributes on the block of yaml with
+     * 'attr:' we want to add them */
+    if let Some(attr) = &intrinsic.attr {
+        /* Scan to see if we have defined `FnCall: [target_feature, ['<bespoke>']]`*/
+        if !has_target_feature_attr(attr) {
+            /* If not add the default one that is defined at the top of
+             * the yaml file. This does mean we scan the attributes vector
+             * twice, once to see if the `target_feature` exists and again
+             * to actually append the tokens. We could impose that the
+             * `target_feature` call has to be the first argument of the
+             * `attr` block */
             tokens.append_all(quote! {
                 #[target_feature(enable = #target_feature)]
             });
         }
 
-        if let Some(assert_instr) = &self.assert_instr {
-            if !assert_instr.is_empty() {
-                InstructionAssertionsForBaseType(&assert_instr, &self.base_type.as_ref())
-                    .to_tokens(tokens)
-            }
+        /* Target feature will get added here */
+        let attr_expressions = &mut attr.iter().peekable();
+        while let Some(ex) = attr_expressions.next() {
+            let mut inner = TokenStream::new();
+            ex.to_tokens(&mut inner);
+            tokens.append(Punct::new('#', Spacing::Alone));
+            tokens.append(Group::new(Delimiter::Bracket, inner));
         }
+    } else {
+        tokens.append_all(quote! {
+            #[target_feature(enable = #target_feature)]
+        });
+    }
 
-        match &self.visibility {
-            FunctionVisibility::Public => tokens.append_all(quote! { pub }),
-            FunctionVisibility::Private => {}
-        }
-        if safety.is_unsafe() {
-            tokens.append_all(quote! { unsafe });
+    if let Some(assert_instr) = &intrinsic.assert_instr {
+        if !assert_instr.is_empty() {
+            InstructionAssertionsForBaseType(&assert_instr, &intrinsic.base_type.as_ref())
+                .to_tokens(tokens)
         }
-        tokens.append_all(quote! { #signature });
+    }
 
-        // If the intrinsic function is explicitly unsafe, we populate `body_default_safety` with
-        // the implementation. No explicit unsafe blocks are required.
-        //
-        // If the intrinsic is safe, we fill `body_default_safety` until we encounter an expression
-        // that requires an unsafe wrapper, then switch to `body_unsafe`. Since the unsafe
-        // operation (e.g. memory access) is typically the last step, this tends to minimises the
-        // amount of unsafe code required.
-        let mut body_default_safety = TokenStream::new();
-        let mut body_unsafe = TokenStream::new();
-        let mut body_current = &mut body_default_safety;
-        for (pos, ex) in self.compose.iter().with_position() {
-            if safety.is_safe() && ex.requires_unsafe_wrapper(&fn_name) {
-                body_current = &mut body_unsafe;
-            }
-            ex.to_tokens(body_current);
-            let is_last = matches!(pos, itertools::Position::Last | itertools::Position::Only);
-            let is_llvm_link = matches!(ex, Expression::LLVMLink(_));
-            if !is_last && !is_llvm_link {
-                body_current.append(Punct::new(';', Spacing::Alone));
-            }
+    match &intrinsic.visibility {
+        FunctionVisibility::Public => tokens.append_all(quote! { pub }),
+        FunctionVisibility::Private => {}
+    }
+    if safety.is_unsafe() {
+        tokens.append_all(quote! { unsafe });
+    }
+    tokens.append_all(quote! { #signature });
+
+    // If the intrinsic function is explicitly unsafe, we populate `body_default_safety` with
+    // the implementation. No explicit unsafe blocks are required.
+    //
+    // If the intrinsic is safe, we fill `body_default_safety` until we encounter an expression
+    // that requires an unsafe wrapper, then switch to `body_unsafe`. Since the unsafe
+    // operation (e.g. memory access) is typically the last step, this tends to minimises the
+    // amount of unsafe code required.
+    let mut body_default_safety = TokenStream::new();
+    let mut body_unsafe = TokenStream::new();
+    let mut body_current = &mut body_default_safety;
+    for (pos, ex) in expressions.iter().with_position() {
+        if safety.is_safe() && ex.requires_unsafe_wrapper(&fn_name) {
+            body_current = &mut body_unsafe;
         }
-        let mut body = body_default_safety;
-        if !body_unsafe.is_empty() {
-            body.append_all(quote! { unsafe { #body_unsafe } });
+        ex.to_tokens(body_current);
+        let is_last = matches!(pos, itertools::Position::Last | itertools::Position::Only);
+        let is_llvm_link = matches!(ex, Expression::LLVMLink(_));
+        if !is_last && !is_llvm_link {
+            body_current.append(Punct::new(';', Spacing::Alone));
         }
+    }
+    let mut body = body_default_safety;
+    if !body_unsafe.is_empty() {
+        body.append_all(quote! { unsafe { #body_unsafe } });
+    }
 
-        tokens.append(Group::new(Delimiter::Brace, body));
+    tokens.append(Group::new(Delimiter::Brace, body));
+}
+
+impl ToTokens for Intrinsic {
+    fn to_tokens(&self, tokens: &mut TokenStream) {
+        if self.big_endian_compose.len() >= 1 {
+            for i in 0..2 {
+                match i {
+                    0 => create_tokens(self, Endianness::Little, tokens),
+                    1 => create_tokens(self, Endianness::Big, tokens),
+                    _ => panic!("Currently only little and big endian exist"),
+                }
+            }
+        } else {
+            create_tokens(self, Endianness::NA, tokens);
+        }
     }
 }
 
diff --git a/crates/stdarch-gen-arm/src/main.rs b/crates/stdarch-gen-arm/src/main.rs
index c78e5dc4e4..9ea1917c14 100644
--- a/crates/stdarch-gen-arm/src/main.rs
+++ b/crates/stdarch-gen-arm/src/main.rs
@@ -1,6 +1,7 @@
 #![feature(pattern)]
 
 mod assert_instr;
+mod big_endian;
 mod context;
 mod expression;
 mod fn_suffix;