diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 8da6074cac..11ad3f04c1 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -149,21 +149,10 @@
["AVX512F"]
- * [ ] [`_cvtmask16_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtmask16_u32)
- * [ ] [`_cvtu32_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cvtu32_mask16)
- * [ ] [`_kortest_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortest_mask16_u8)
- * [ ] [`_kortestc_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestc_mask16_u8)
- * [ ] [`_kortestz_mask16_u8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kortestz_mask16_u8)
- * [ ] [`_kshiftli_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftli_mask16)
- * [ ] [`_kshiftri_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kshiftri_mask16)
- * [ ] [`_load_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask16)
- * [ ] [`_mm512_cvtsd_f64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsd_f64)
- * [ ] [`_mm512_cvtss_f32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtss_f32)
* [ ] [`_mm512_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
* [ ] [`_mm512_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
* [ ] [`_mm512_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
* [ ] [`_mm512_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
- * [ ] [`_mm512_kortestz`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kortestz)
* [ ] [`_mm512_mask_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
@@ -175,7 +164,6 @@
* [ ] [`_mm_mask_store_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
* [ ] [`_mm_maskz_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
* [ ] [`_mm_maskz_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
- * [ ] [`_store_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask16)
@@ -204,9 +192,6 @@
* [ ] [`_mm256_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
* [ ] [`_mm256_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
* [ ] [`_mm256_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
- * [ ] [`_mm256_rsqrt14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt14_pd)
- * [ ] [`_mm256_rsqrt14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt14_ps)
- * [ ] [`_mm_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi64)
* [ ] [`_mm_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
* [ ] [`_mm_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
* [ ] [`_mm_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
@@ -215,7 +200,6 @@
* [ ] [`_mm_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
* [ ] [`_mm_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
* [ ] [`_mm_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
- * [ ] [`_mm_mask_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi64)
* [ ] [`_mm_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
* [ ] [`_mm_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
* [ ] [`_mm_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
@@ -224,10 +208,6 @@
* [ ] [`_mm_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
* [ ] [`_mm_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
* [ ] [`_mm_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
- * [ ] [`_mm_mask_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi64)
- * [ ] [`_mm_maskz_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi64)
- * [ ] [`_mm_maskz_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi64)
- * [ ] [`_mm_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi64)
* [ ] [`_mm_mmask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
* [ ] [`_mm_mmask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
* [ ] [`_mm_mmask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
@@ -236,8 +216,6 @@
* [ ] [`_mm_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
* [ ] [`_mm_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
* [ ] [`_mm_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
- * [ ] [`_mm_rsqrt14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt14_pd)
- * [ ] [`_mm_rsqrt14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt14_ps)
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index e5f78a2ffb..8c88d3aa2f 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -2,6 +2,7 @@ use crate::{
arch::asm,
core_arch::{simd::*, x86::*},
intrinsics::simd::*,
+ intrinsics::{fmaf32, fmaf64},
mem, ptr,
};
@@ -48,11 +49,9 @@ use stdarch_test::assert_instr;
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
let a = a.as_i32x16();
- // all-0 is a properly initialized i32x16
- let zero: i32x16 = mem::zeroed();
- let sub = simd_sub(zero, a);
- let cmp: i32x16 = simd_gt(a, zero);
- transmute(simd_select(cmp, a, sub))
+ let zero = i32x16::splat(0);
+ let r = simd_select::(simd_lt(a, zero), simd_neg(a), a);
+ transmute(r)
}
/// Computes the absolute value of packed 32-bit integers in `a`, and store the
@@ -143,11 +142,9 @@ pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
let a = a.as_i64x8();
- // all-0 is a properly initialized i64x8
- let zero: i64x8 = mem::zeroed();
- let sub = simd_sub(zero, a);
- let cmp: i64x8 = simd_gt(a, zero);
- transmute(simd_select(cmp, a, sub))
+ let zero = i64x8::splat(0);
+ let r = simd_select::(simd_lt(a, zero), simd_neg(a), a);
+ transmute(r)
}
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -184,11 +181,9 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
let a = a.as_i64x4();
- // all-0 is a properly initialized i64x4
- let zero: i64x4 = mem::zeroed();
- let sub = simd_sub(zero, a);
- let cmp: i64x4 = simd_gt(a, zero);
- transmute(simd_select(cmp, a, sub))
+ let zero = i64x4::splat(0);
+ let r = simd_select::(simd_lt(a, zero), simd_neg(a), a);
+ transmute(r)
}
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -205,7 +200,7 @@ pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -216,18 +211,54 @@ pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(k, abs, zero))
}
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i {
+ let a = a.as_i64x2();
+ let zero = i64x2::splat(0);
+ let r = simd_select::(simd_lt(a, zero), simd_neg(a), a);
+ transmute(r)
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let abs = _mm_abs_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let abs = _mm_abs_epi64(a).as_i64x2();
+ let zero = i64x2::splat(0);
+ transmute(simd_select_bitmask(k, abs, zero))
+}
+
/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vpandq))]
+#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
- let a = _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code
- let b = transmute::(v2.as_f32x16());
- let abs = _mm512_and_epi32(a, b);
- transmute(abs)
+ simd_fabs(v2)
}
/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -238,8 +269,7 @@ pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
- let abs = _mm512_abs_ps(v2).as_f32x16();
- transmute(simd_select_bitmask(k, abs, src.as_f32x16()))
+ simd_select_bitmask(k, simd_fabs(v2), src)
}
/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
@@ -250,10 +280,7 @@ pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m51
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
- let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code
- let b = transmute::(v2.as_f64x8());
- let abs = _mm512_and_epi64(a, b);
- transmute(abs)
+ simd_fabs(v2)
}
/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -264,8 +291,7 @@ pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
- let abs = _mm512_abs_pd(v2).as_f64x8();
- transmute(simd_select_bitmask(k, abs, src.as_f64x8()))
+ simd_select_bitmask(k, simd_fabs(v2), src)
}
/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1264,7 +1290,9 @@ pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
+ let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
+ let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
+ transmute(simd_mul(a, b))
}
/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1476,7 +1504,10 @@ pub unsafe fn _mm512_mask_mullox_epi64(
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
+ let a = a.as_u64x8();
+ let b = b.as_u64x8();
+ let mask = u64x8::splat(u32::MAX.into());
+ transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
}
/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1906,7 +1937,9 @@ pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
+ let a = a.as_i32x16();
+ let b = b.as_i32x16();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1992,7 +2025,9 @@ pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
+ let a = a.as_i64x8();
+ let b = b.as_i64x8();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2028,7 +2063,9 @@ pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
- transmute(vpmaxsq256(a.as_i64x4(), b.as_i64x4()))
+ let a = a.as_i64x4();
+ let b = b.as_i64x4();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2064,7 +2101,9 @@ pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
- transmute(vpmaxsq128(a.as_i64x2(), b.as_i64x2()))
+ let a = a.as_i64x2();
+ let b = b.as_i64x2();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2276,7 +2315,9 @@ pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
+ let a = a.as_u32x16();
+ let b = b.as_u32x16();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2362,7 +2403,9 @@ pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
+ let a = a.as_u64x8();
+ let b = b.as_u64x8();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2398,7 +2441,9 @@ pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
- transmute(vpmaxuq256(a.as_u64x4(), b.as_u64x4()))
+ let a = a.as_u64x4();
+ let b = b.as_u64x4();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2434,7 +2479,9 @@ pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
- transmute(vpmaxuq128(a.as_u64x2(), b.as_u64x2()))
+ let a = a.as_u64x2();
+ let b = b.as_u64x2();
+ transmute(simd_select::(simd_gt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2470,7 +2517,9 @@ pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
+ let a = a.as_i32x16();
+ let b = b.as_i32x16();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2556,7 +2605,9 @@ pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
+ let a = a.as_i64x8();
+ let b = b.as_i64x8();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2592,7 +2643,9 @@ pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
- transmute(vpminsq256(a.as_i64x4(), b.as_i64x4()))
+ let a = a.as_i64x4();
+ let b = b.as_i64x4();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2620,6 +2673,44 @@ pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m
transmute(simd_select_bitmask(k, min, zero))
}
+/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpminsq))]
+pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
+ let a = a.as_i64x2();
+ let b = b.as_i64x2();
+ transmute(simd_select::(simd_lt(a, b), a, b))
+}
+
+/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpminsq))]
+pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+ let min = _mm_min_epi64(a, b).as_i64x2();
+ transmute(simd_select_bitmask(k, min, src.as_i64x2()))
+}
+
+/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpminsq))]
+pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+ let min = _mm_min_epi64(a, b).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, min, zero))
+}
+
/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
@@ -2806,7 +2897,9 @@ pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
+ let a = a.as_u32x16();
+ let b = b.as_u32x16();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2892,7 +2985,9 @@ pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
- transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
+ let a = a.as_u64x8();
+ let b = b.as_u64x8();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2928,7 +3023,9 @@ pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
- transmute(vpminuq256(a.as_u64x4(), b.as_u64x4()))
+ let a = a.as_u64x4();
+ let b = b.as_u64x4();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2964,7 +3061,9 @@ pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
- transmute(vpminuq128(a.as_u64x2(), b.as_u64x2()))
+ let a = a.as_u64x2();
+ let b = b.as_u64x2();
+ transmute(simd_select::(simd_lt(a, b), a, b))
}
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3000,7 +3099,7 @@ pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
- transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
+ simd_fsqrt(a)
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3011,8 +3110,7 @@ pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
- let sqrt = _mm512_sqrt_ps(a).as_f32x16();
- transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3023,9 +3121,7 @@ pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m51
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
- let sqrt = _mm512_sqrt_ps(a).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps())
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3036,8 +3132,7 @@ pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
- let sqrt = _mm256_sqrt_ps(a).as_f32x8();
- transmute(simd_select_bitmask(k, sqrt, src.as_f32x8()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3048,9 +3143,7 @@ pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
- let sqrt = _mm256_sqrt_ps(a).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps())
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3061,8 +3154,7 @@ pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
- let sqrt = _mm_sqrt_ps(a).as_f32x4();
- transmute(simd_select_bitmask(k, sqrt, src.as_f32x4()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3073,9 +3165,7 @@ pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
- let sqrt = _mm_sqrt_ps(a).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps())
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
@@ -3086,7 +3176,7 @@ pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
- transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
+ simd_fsqrt(a)
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3097,8 +3187,7 @@ pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
- let sqrt = _mm512_sqrt_pd(a).as_f64x8();
- transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3109,9 +3198,7 @@ pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m5
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
- let sqrt = _mm512_sqrt_pd(a).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd())
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3122,8 +3209,7 @@ pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
- let sqrt = _mm256_sqrt_pd(a).as_f64x4();
- transmute(simd_select_bitmask(k, sqrt, src.as_f64x4()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3134,9 +3220,7 @@ pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
- let sqrt = _mm256_sqrt_pd(a).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd())
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3147,8 +3231,7 @@ pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
- let sqrt = _mm_sqrt_pd(a).as_f64x2();
- transmute(simd_select_bitmask(k, sqrt, src.as_f64x2()))
+ simd_select_bitmask(k, simd_fsqrt(a), src)
}
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3159,9 +3242,7 @@ pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
- let sqrt = _mm_sqrt_pd(a).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, sqrt, zero))
+ simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
@@ -3172,7 +3253,7 @@ pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- transmute(vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16()))
+ simd_fma(a, b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3183,8 +3264,7 @@ pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3195,9 +3275,7 @@ pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3208,8 +3286,7 @@ pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m51
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3220,8 +3297,7 @@ pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmadd, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3232,9 +3308,7 @@ pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3245,8 +3319,7 @@ pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmadd, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3257,8 +3330,7 @@ pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmadd, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3269,9 +3341,7 @@ pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3282,8 +3352,7 @@ pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmadd, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
@@ -3294,7 +3363,7 @@ pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- transmute(vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8()))
+ simd_fma(a, b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3305,8 +3374,7 @@ pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3317,9 +3385,7 @@ pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m51
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3330,8 +3396,7 @@ pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m5
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3342,8 +3407,7 @@ pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmadd, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3354,9 +3418,7 @@ pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m25
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3367,8 +3429,7 @@ pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmadd, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3379,8 +3440,7 @@ pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmadd, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3391,9 +3451,7 @@ pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fmadd, zero))
+ simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3404,8 +3462,7 @@ pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmadd, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
@@ -3414,11 +3471,9 @@ pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- transmute(vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub))
+ simd_fma(a, b, simd_neg(c))
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3427,10 +3482,9 @@ pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3439,11 +3493,9 @@ pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3452,10 +3504,9 @@ pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3466,8 +3517,7 @@ pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmsub, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3478,9 +3528,7 @@ pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3491,8 +3539,7 @@ pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmsub, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3503,8 +3550,7 @@ pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmsub, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3515,9 +3561,7 @@ pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3528,8 +3572,7 @@ pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmsub, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
@@ -3538,11 +3581,9 @@ pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- transmute(vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub))
+ simd_fma(a, b, simd_neg(c))
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3551,10 +3592,9 @@ pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3563,11 +3603,9 @@ pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3576,10 +3614,9 @@ pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3590,8 +3627,7 @@ pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmsub, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3602,9 +3638,7 @@ pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m25
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3615,8 +3649,7 @@ pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmsub, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3627,8 +3660,7 @@ pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmsub, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3639,9 +3671,7 @@ pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fmsub, zero))
+ simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3652,8 +3682,7 @@ pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmsub, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
@@ -3664,12 +3693,13 @@ pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- transmute(vfmaddsub213ps(
- a.as_f32x16(),
- b.as_f32x16(),
- c.as_f32x16(),
- _MM_FROUND_CUR_DIRECTION,
- ))
+ let add = simd_fma(a, b, c);
+ let sub = simd_fma(a, b, simd_neg(c));
+ simd_shuffle!(
+ add,
+ sub,
+ [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
+ )
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3680,8 +3710,7 @@ pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3692,9 +3721,7 @@ pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3705,8 +3732,7 @@ pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3717,8 +3743,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3729,9 +3754,7 @@ pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3742,8 +3765,7 @@ pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3754,8 +3776,7 @@ pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3766,9 +3787,7 @@ pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3779,8 +3798,7 @@ pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
@@ -3791,12 +3809,9 @@ pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- transmute(vfmaddsub213pd(
- a.as_f64x8(),
- b.as_f64x8(),
- c.as_f64x8(),
- _MM_FROUND_CUR_DIRECTION,
- ))
+ let add = simd_fma(a, b, c);
+ let sub = simd_fma(a, b, simd_neg(c));
+ simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3807,8 +3822,7 @@ pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3819,9 +3833,7 @@ pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3832,8 +3844,7 @@ pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3844,8 +3855,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3856,9 +3866,7 @@ pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3869,8 +3877,7 @@ pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3881,8 +3888,7 @@ pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3893,9 +3899,7 @@ pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fmaddsub, zero))
+ simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3906,8 +3910,7 @@ pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
@@ -3916,16 +3919,15 @@ pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- transmute(vfmaddsub213ps(
- a.as_f32x16(),
- b.as_f32x16(),
+ let add = simd_fma(a, b, c);
+ let sub = simd_fma(a, b, simd_neg(c));
+ simd_shuffle!(
+ add,
sub,
- _MM_FROUND_CUR_DIRECTION,
- ))
+ [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
+ )
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3934,10 +3936,9 @@ pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3946,11 +3947,9 @@ pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3959,10 +3958,9 @@ pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3973,8 +3971,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3985,9 +3982,7 @@ pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -3998,8 +3993,7 @@ pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4010,8 +4004,7 @@ pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mma
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4022,9 +4015,7 @@ pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4035,8 +4026,7 @@ pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
@@ -4045,16 +4035,11 @@ pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- transmute(vfmaddsub213pd(
- a.as_f64x8(),
- b.as_f64x8(),
- sub,
- _MM_FROUND_CUR_DIRECTION,
- ))
+ let add = simd_fma(a, b, c);
+ let sub = simd_fma(a, b, simd_neg(c));
+ simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4063,10 +4048,9 @@ pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4075,11 +4059,9 @@ pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4088,10 +4070,9 @@ pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: _
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4102,8 +4083,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4114,9 +4094,7 @@ pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4127,8 +4105,7 @@ pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4139,8 +4116,7 @@ pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4151,9 +4127,7 @@ pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m12
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fmsubadd, zero))
+ simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4164,8 +4138,7 @@ pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m1
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
@@ -4174,11 +4147,9 @@ pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
- transmute(vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16()))
+ simd_fma(simd_neg(a), b, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4187,10 +4158,9 @@ pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4199,11 +4169,9 @@ pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4212,10 +4180,9 @@ pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4226,8 +4193,7 @@ pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4238,9 +4204,7 @@ pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4251,8 +4215,7 @@ pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m25
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4263,8 +4226,7 @@ pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4275,9 +4237,7 @@ pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4288,8 +4248,7 @@ pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
@@ -4298,11 +4257,9 @@ pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
- transmute(vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8()))
+ simd_fma(simd_neg(a), b, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4311,10 +4268,9 @@ pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4323,11 +4279,9 @@ pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4336,10 +4290,9 @@ pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4350,8 +4303,7 @@ pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mm
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4362,9 +4314,7 @@ pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4375,8 +4325,7 @@ pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4387,8 +4336,7 @@ pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mm
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fnmadd, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4399,9 +4347,7 @@ pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fnmadd, zero))
+ simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4412,8 +4358,7 @@ pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fnmadd, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
@@ -4422,12 +4367,9 @@ pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
- transmute(vfmadd132ps(suba, b.as_f32x16(), subc))
+ simd_fma(simd_neg(a), b, simd_neg(c))
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4436,10 +4378,9 @@ pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
- let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4448,11 +4389,9 @@ pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
- let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4461,10 +4400,9 @@ pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
- let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
+ simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4475,8 +4413,7 @@ pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
- let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4487,9 +4424,7 @@ pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
- let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
- let zero = _mm256_setzero_ps().as_f32x8();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4500,8 +4435,7 @@ pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m25
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
- let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f32x8()))
+ simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4512,8 +4446,7 @@ pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
- let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f32x4()))
+ simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4524,9 +4457,7 @@ pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
- let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4537,8 +4468,7 @@ pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128)
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
- let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f32x4()))
+ simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
@@ -4547,12 +4477,9 @@ pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
- transmute(vfmadd132pd(suba, b.as_f64x8(), subc))
+ simd_fma(simd_neg(a), b, simd_neg(c))
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4561,10 +4488,9 @@ pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
- let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4573,11 +4499,9 @@ pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4586,10 +4510,9 @@ pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
- let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
+ simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4600,8 +4523,7 @@ pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mm
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
- let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4612,9 +4534,7 @@ pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m2
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
- let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
- let zero = _mm256_setzero_pd().as_f64x4();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4625,8 +4545,7 @@ pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
- let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f64x4()))
+ simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -4637,8 +4556,7 @@ pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mm
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
- let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fnmsub, a.as_f64x2()))
+ simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -4649,9 +4567,7 @@ pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
- let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- transmute(simd_select_bitmask(k, fnmsub, zero))
+ simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
@@ -4662,8 +4578,7 @@ pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
- let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
- transmute(simd_select_bitmask(k, fnmsub, c.as_f64x2()))
+ simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c)
}
/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
@@ -4929,6 +4844,21 @@ pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
))
}
+/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vrsqrt14ps))]
+pub unsafe fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
+ transmute(vrsqrt14ps256(
+ a.as_f32x8(),
+ _mm256_setzero_ps().as_f32x8(),
+ 0b11111111,
+ ))
+}
+
/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
@@ -4955,6 +4885,21 @@ pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
))
}
+/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vrsqrt14ps))]
+pub unsafe fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
+ transmute(vrsqrt14ps128(
+ a.as_f32x4(),
+ _mm_setzero_ps().as_f32x4(),
+ 0b00001111,
+ ))
+}
+
/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
@@ -5014,6 +4959,21 @@ pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
+/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vrsqrt14pd))]
+pub unsafe fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
+ transmute(vrsqrt14pd256(
+ a.as_f64x4(),
+ _mm256_setzero_pd().as_f64x4(),
+ 0b00001111,
+ ))
+}
+
/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
@@ -5040,6 +5000,21 @@ pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
))
}
+/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vrsqrt14pd))]
+pub unsafe fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
+ transmute(vrsqrt14pd128(
+ a.as_f64x2(),
+ _mm_setzero_pd().as_f64x2(),
+ 0b00000011,
+ ))
+}
+
/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
@@ -8153,11 +8128,7 @@ pub unsafe fn _mm512_fmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(a, b, c, ROUNDING);
- transmute(r)
+ vfmadd132psround(a, b, c, ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8182,11 +8153,7 @@ pub unsafe fn _mm512_mask_fmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8211,12 +8178,7 @@ pub unsafe fn _mm512_maskz_fmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(a, b, c, ROUNDING);
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, r, zero))
+ simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8241,11 +8203,7 @@ pub unsafe fn _mm512_mask3_fmadd_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
@@ -8269,11 +8227,7 @@ pub unsafe fn _mm512_fmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(a, b, c, ROUNDING);
- transmute(r)
+ vfmadd132pdround(a, b, c, ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8298,11 +8252,7 @@ pub unsafe fn _mm512_mask_fmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8327,12 +8277,7 @@ pub unsafe fn _mm512_maskz_fmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(a, b, c, ROUNDING);
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, r, zero))
+ simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8357,11 +8302,7 @@ pub unsafe fn _mm512_mask3_fmadd_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
@@ -8377,7 +8318,7 @@ pub unsafe fn _mm512_mask3_fmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsub_round_ps(
a: __m512,
@@ -8385,12 +8326,7 @@ pub unsafe fn _mm512_fmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmadd132psround(a, b, sub, ROUNDING);
- transmute(r)
+ vfmadd132psround(a, b, simd_neg(c), ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8406,7 +8342,7 @@ pub unsafe fn _mm512_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsub_round_ps(
a: __m512,
@@ -8415,12 +8351,8 @@ pub unsafe fn _mm512_mask_fmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmadd132psround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8436,7 +8368,7 @@ pub unsafe fn _mm512_mask_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsub_round_ps(
k: __mmask16,
@@ -8445,12 +8377,8 @@ pub unsafe fn _mm512_maskz_fmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmadd132psround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8466,7 +8394,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsub_round_ps(
a: __m512,
@@ -8475,13 +8403,8 @@ pub unsafe fn _mm512_mask3_fmsub_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let c = c.as_f32x16();
- let sub = simd_sub(zero, c);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmadd132psround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
@@ -8497,7 +8420,7 @@ pub unsafe fn _mm512_mask3_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsub_round_pd(
a: __m512d,
@@ -8505,12 +8428,7 @@ pub unsafe fn _mm512_fmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmadd132pdround(a, b, sub, ROUNDING);
- transmute(r)
+ vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8526,7 +8444,7 @@ pub unsafe fn _mm512_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsub_round_pd(
a: __m512d,
@@ -8535,12 +8453,8 @@ pub unsafe fn _mm512_mask_fmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmadd132pdround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8556,7 +8470,7 @@ pub unsafe fn _mm512_mask_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsub_round_pd(
k: __mmask8,
@@ -8565,12 +8479,8 @@ pub unsafe fn _mm512_maskz_fmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmadd132pdround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8586,7 +8496,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsub_round_pd(
a: __m512d,
@@ -8595,13 +8505,8 @@ pub unsafe fn _mm512_mask3_fmsub_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let c = c.as_f64x8();
- let sub = simd_sub(zero, c);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmadd132pdround(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
@@ -8625,11 +8530,7 @@ pub unsafe fn _mm512_fmaddsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmaddsub213ps(a, b, c, ROUNDING);
- transmute(r)
+ vfmaddsubpsround(a, b, c, ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8654,11 +8555,7 @@ pub unsafe fn _mm512_mask_fmaddsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmaddsub213ps(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8683,12 +8580,7 @@ pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmaddsub213ps(a, b, c, ROUNDING);
- let zero = _mm512_setzero_ps().as_f32x16();
- transmute(simd_select_bitmask(k, r, zero))
+ simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8713,11 +8605,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmaddsub213ps(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
@@ -8741,11 +8629,7 @@ pub unsafe fn _mm512_fmaddsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmaddsub213pd(a, b, c, ROUNDING);
- transmute(r)
+ vfmaddsubpdround(a, b, c, ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8770,11 +8654,7 @@ pub unsafe fn _mm512_mask_fmaddsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmaddsub213pd(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8799,12 +8679,7 @@ pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmaddsub213pd(a, b, c, ROUNDING);
- let zero = _mm512_setzero_pd().as_f64x8();
- transmute(simd_select_bitmask(k, r, zero))
+ simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8829,11 +8704,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmaddsub213pd(a, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
@@ -8849,7 +8720,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsubadd_round_ps(
a: __m512,
@@ -8857,12 +8728,7 @@ pub unsafe fn _mm512_fmsubadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmaddsub213ps(a, b, sub, ROUNDING);
- transmute(r)
+ vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8878,7 +8744,7 @@ pub unsafe fn _mm512_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_ps(
a: __m512,
@@ -8887,12 +8753,8 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmaddsub213ps(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -8908,7 +8770,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
k: __mmask16,
@@ -8917,12 +8779,8 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmaddsub213ps(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -8938,7 +8796,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
a: __m512,
@@ -8947,13 +8805,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let c = c.as_f32x16();
- let sub = simd_sub(zero, c);
- let a = a.as_f32x16();
- let b = b.as_f32x16();
- let r = vfmaddsub213ps(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
@@ -8969,7 +8822,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsubadd_round_pd(
a: __m512d,
@@ -8977,12 +8830,7 @@ pub unsafe fn _mm512_fmsubadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmaddsub213pd(a, b, sub, ROUNDING);
- transmute(r)
+ vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -8998,7 +8846,7 @@ pub unsafe fn _mm512_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_pd(
a: __m512d,
@@ -9007,12 +8855,8 @@ pub unsafe fn _mm512_mask_fmsubadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmaddsub213pd(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -9028,7 +8872,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
k: __mmask8,
@@ -9037,12 +8881,8 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmaddsub213pd(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -9058,7 +8898,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
a: __m512d,
@@ -9067,13 +8907,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let c = c.as_f64x8();
- let sub = simd_sub(zero, c);
- let a = a.as_f64x8();
- let b = b.as_f64x8();
- let r = vfmaddsub213pd(a, b, sub, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
@@ -9089,7 +8924,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmadd_round_ps(
a: __m512,
@@ -9097,12 +8932,7 @@ pub unsafe fn _mm512_fnmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(sub, b, c, ROUNDING);
- transmute(r)
+ vfmadd132psround(simd_neg(a), b, c, ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -9118,7 +8948,7 @@ pub unsafe fn _mm512_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmadd_round_ps(
a: __m512,
@@ -9127,12 +8957,8 @@ pub unsafe fn _mm512_mask_fnmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a.as_f32x16()))
+ let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -9148,7 +8974,7 @@ pub unsafe fn _mm512_mask_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_ps(
k: __mmask16,
@@ -9157,12 +8983,8 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -9178,7 +9000,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_ps(
a: __m512,
@@ -9187,12 +9009,8 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
- let b = b.as_f32x16();
- let c = c.as_f32x16();
- let r = vfmadd132psround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
@@ -9208,7 +9026,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmadd_round_pd(
a: __m512d,
@@ -9216,12 +9034,7 @@ pub unsafe fn _mm512_fnmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(sub, b, c, ROUNDING);
- transmute(r)
+ vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -9237,7 +9050,7 @@ pub unsafe fn _mm512_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmadd_round_pd(
a: __m512d,
@@ -9246,13 +9059,8 @@ pub unsafe fn _mm512_mask_fnmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let a = a.as_f64x8();
- let sub = simd_sub(zero, a);
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -9268,7 +9076,7 @@ pub unsafe fn _mm512_mask_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_pd(
k: __mmask8,
@@ -9277,12 +9085,8 @@ pub unsafe fn _mm512_maskz_fnmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -9298,7 +9102,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_pd(
a: __m512d,
@@ -9307,12 +9111,8 @@ pub unsafe fn _mm512_mask3_fnmadd_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
- let b = b.as_f64x8();
- let c = c.as_f64x8();
- let r = vfmadd132pdround(sub, b, c, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
@@ -9328,7 +9128,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmsub_round_ps(
a: __m512,
@@ -9336,12 +9136,7 @@ pub unsafe fn _mm512_fnmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
- let b = b.as_f32x16();
- let r = vfmadd132psround(suba, b, subc, ROUNDING);
- transmute(r)
+ vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -9357,7 +9152,7 @@ pub unsafe fn _mm512_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmsub_round_ps(
a: __m512,
@@ -9366,13 +9161,8 @@ pub unsafe fn _mm512_mask_fnmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let a = a.as_f32x16();
- let suba = simd_sub(zero, a);
- let subc = simd_sub(zero, c.as_f32x16());
- let b = b.as_f32x16();
- let r = vfmadd132psround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -9388,7 +9178,7 @@ pub unsafe fn _mm512_mask_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_ps(
k: __mmask16,
@@ -9397,12 +9187,8 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
- let b = b.as_f32x16();
- let r = vfmadd132psround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_ps())
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -9418,7 +9204,7 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_ps(
a: __m512,
@@ -9427,13 +9213,8 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let c = c.as_f32x16();
- let subc = simd_sub(zero, c);
- let b = b.as_f32x16();
- let r = vfmadd132psround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
@@ -9449,7 +9230,7 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmsub_round_pd(
a: __m512d,
@@ -9457,12 +9238,7 @@ pub unsafe fn _mm512_fnmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
- let b = b.as_f64x8();
- let r = vfmadd132pdround(suba, b, subc, ROUNDING);
- transmute(r)
+ vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
@@ -9478,7 +9254,7 @@ pub unsafe fn _mm512_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmsub_round_pd(
a: __m512d,
@@ -9487,13 +9263,8 @@ pub unsafe fn _mm512_mask_fnmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let a = a.as_f64x8();
- let suba = simd_sub(zero, a);
- let subc = simd_sub(zero, c.as_f64x8());
- let b = b.as_f64x8();
- let r = vfmadd132pdround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, a))
+ let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, a)
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
@@ -9509,7 +9280,7 @@ pub unsafe fn _mm512_mask_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_pd(
k: __mmask8,
@@ -9518,12 +9289,8 @@ pub unsafe fn _mm512_maskz_fnmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
- let b = b.as_f64x8();
- let r = vfmadd132pdround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, zero))
+ let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, _mm512_setzero_pd())
}
/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
@@ -9539,7 +9306,7 @@ pub unsafe fn _mm512_maskz_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_pd(
a: __m512d,
@@ -9548,13 +9315,8 @@ pub unsafe fn _mm512_mask3_fnmsub_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let c = c.as_f64x8();
- let subc = simd_sub(zero, c);
- let b = b.as_f64x8();
- let r = vfmadd132pdround(suba, b, subc, ROUNDING);
- transmute(simd_select_bitmask(k, r, c))
+ let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
+ simd_select_bitmask(k, r, c)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
@@ -15947,7 +15709,7 @@ pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_pd() -> __m512d {
// All-0 is a properly initialized __m512d
- mem::zeroed()
+ const { mem::zeroed() }
}
/// Returns vector of type `__m512` with all elements set to zero.
@@ -15959,7 +15721,7 @@ pub unsafe fn _mm512_setzero_pd() -> __m512d {
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_ps() -> __m512 {
// All-0 is a properly initialized __m512
- mem::zeroed()
+ const { mem::zeroed() }
}
/// Return vector of type `__m512` with all elements set to zero.
@@ -15971,7 +15733,7 @@ pub unsafe fn _mm512_setzero_ps() -> __m512 {
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero() -> __m512 {
// All-0 is a properly initialized __m512
- mem::zeroed()
+ const { mem::zeroed() }
}
/// Returns vector of type `__m512i` with all elements set to zero.
@@ -15983,7 +15745,7 @@ pub unsafe fn _mm512_setzero() -> __m512 {
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_si512() -> __m512i {
// All-0 is a properly initialized __m512i
- mem::zeroed()
+ const { mem::zeroed() }
}
/// Return vector of type `__m512i` with all elements set to zero.
@@ -15995,7 +15757,7 @@ pub unsafe fn _mm512_setzero_si512() -> __m512i {
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_epi32() -> __m512i {
// All-0 is a properly initialized __m512i
- mem::zeroed()
+ const { mem::zeroed() }
}
/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
@@ -25556,8 +25318,27 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
- let extract: i32 = simd_extract!(a.as_i32x16(), 0);
- extract
+ simd_extract!(a.as_i32x16(), 0)
+}
+
+/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_cvtss_f32(a: __m512) -> f32 {
+ simd_extract!(a, 0)
+}
+
+/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
+ simd_extract!(a, 0)
}
/// Broadcast the low packed 32-bit integer from a to all elements of dst.
@@ -27493,6 +27274,26 @@ pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
}
+/// Convert 16-bit mask a into an integer value, and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 {
+ a as u32
+}
+
+/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 {
+ a as __mmask16
+}
+
/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
@@ -27623,6 +27424,83 @@ pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_knot(_mm512_kxor(a, b))
}
+/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
+ let tmp = _kor_mask16(a, b);
+ *all_ones = (tmp == 0xffff) as u8;
+ (tmp == 0) as u8
+}
+
+/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+ (_kor_mask16(a, b) == 0xffff) as u8
+}
+
+/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
+/// store 0 in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+ (_kor_mask16(a, b) == 0) as u8
+}
+
+/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kshiftli_mask16(a: __mmask16) -> __mmask16 {
+ a << COUNT
+}
+
+/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _kshiftri_mask16(a: __mmask16) -> __mmask16 {
+ a >> COUNT
+}
+
+/// Load 16-bit mask from memory
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
+ *mem_addr
+}
+
+/// Store 16-bit mask to memory
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
+ *mem_addr = a;
+}
+
/// Copy 16-bit mask a to k.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
@@ -27674,12 +27552,20 @@ pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
- let r = a | b;
- if r == 0b11111111_11111111 {
- 1
- } else {
- 0
- }
+ let r = (a | b) == 0b11111111_11111111;
+ r as i32
+}
+
+/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=512_kortestz)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
+pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
+ let r = (a | b) == 0;
+ r as i32
}
/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
@@ -29208,7 +29094,7 @@ pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
#[inline]
@@ -29216,7 +29102,7 @@ pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmplt_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -29230,7 +29116,7 @@ pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::(simd_lt(a.as_u32x8(), b.as_u32x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
#[inline]
@@ -29238,7 +29124,7 @@ pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmplt_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -29252,7 +29138,7 @@ pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::(simd_lt(a.as_u32x4(), b.as_u32x4()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
#[inline]
@@ -29260,7 +29146,7 @@ pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29282,7 +29168,7 @@ pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpgt_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29304,7 +29190,7 @@ pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpgt_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29326,7 +29212,7 @@ pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29340,7 +29226,7 @@ pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
#[inline]
@@ -29348,7 +29234,7 @@ pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmple_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29362,7 +29248,7 @@ pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::(simd_le(a.as_u32x8(), b.as_u32x8()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
#[inline]
@@ -29370,7 +29256,7 @@ pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmple_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29384,7 +29270,7 @@ pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::(simd_le(a.as_u32x4(), b.as_u32x4()))
}
-/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
#[inline]
@@ -29392,7 +29278,7 @@ pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29414,7 +29300,7 @@ pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpge_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29436,7 +29322,7 @@ pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpge_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29458,7 +29344,7 @@ pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -29480,7 +29366,7 @@ pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpeq_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -29502,7 +29388,7 @@ pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpeq_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -29524,7 +29410,7 @@ pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -29546,7 +29432,7 @@ pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpneq_epu32_mask(a, b) & k1
+ _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -29568,7 +29454,7 @@ pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpneq_epu32_mask(a, b) & k1
+ _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -29590,7 +29476,7 @@ pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epu32_mask(a, b) & k1
+ _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -29606,11 +29492,19 @@ pub unsafe fn _mm512_cmp_epu32_mask(
b: __m512i,
) -> __mmask16 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i32x16();
- let b = b.as_i32x16();
- let r = vpcmpud(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u32x16();
+ let b = b.as_u32x16();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -29627,10 +29521,20 @@ pub unsafe fn _mm512_mask_cmp_epu32_mask(
b: __m512i,
) -> __mmask16 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i32x16();
- let b = b.as_i32x16();
- let r = vpcmpud(a, b, IMM3, k1 as i16);
- transmute(r)
+ let a = a.as_u32x16();
+ let b = b.as_u32x16();
+ let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -29646,11 +29550,19 @@ pub unsafe fn _mm256_cmp_epu32_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i32x8();
- let b = b.as_i32x8();
- let r = vpcmpud256(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u32x8();
+ let b = b.as_u32x8();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -29667,10 +29579,20 @@ pub unsafe fn _mm256_mask_cmp_epu32_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i32x8();
- let b = b.as_i32x8();
- let r = vpcmpud256(a, b, IMM3, k1 as i8);
- transmute(r)
+ let a = a.as_u32x8();
+ let b = b.as_u32x8();
+ let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -29683,11 +29605,19 @@ pub unsafe fn _mm256_mask_cmp_epu32_mask(
#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
pub unsafe fn _mm_cmp_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i32x4();
- let b = b.as_i32x4();
- let r = vpcmpud128(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u32x4();
+ let b = b.as_u32x4();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x4::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -29704,10 +29634,20 @@ pub unsafe fn _mm_mask_cmp_epu32_mask(
b: __m128i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i32x4();
- let b = b.as_i32x4();
- let r = vpcmpud128(a, b, IMM3, k1 as i8);
- transmute(r)
+ let a = a.as_u32x4();
+ let b = b.as_u32x4();
+ let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x4::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -29721,7 +29661,7 @@ pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
#[inline]
@@ -29729,7 +29669,7 @@ pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmplt_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -29743,7 +29683,7 @@ pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::(simd_lt(a.as_i32x8(), b.as_i32x8()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
#[inline]
@@ -29751,7 +29691,7 @@ pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmplt_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -29765,7 +29705,7 @@ pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::(simd_lt(a.as_i32x4(), b.as_i32x4()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
#[inline]
@@ -29773,7 +29713,7 @@ pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29795,7 +29735,7 @@ pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpgt_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29817,7 +29757,7 @@ pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpgt_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -29839,7 +29779,7 @@ pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29853,7 +29793,7 @@ pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
#[inline]
@@ -29861,7 +29801,7 @@ pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmple_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29875,7 +29815,7 @@ pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::(simd_le(a.as_i32x8(), b.as_i32x8()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
#[inline]
@@ -29883,7 +29823,7 @@ pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmple_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -29897,7 +29837,7 @@ pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::(simd_le(a.as_i32x4(), b.as_i32x4()))
}
-/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
#[inline]
@@ -29905,7 +29845,7 @@ pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29927,7 +29867,7 @@ pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpge_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29949,7 +29889,7 @@ pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpge_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -29971,7 +29911,7 @@ pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -29993,7 +29933,7 @@ pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpeq_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -30015,7 +29955,7 @@ pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpeq_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
@@ -30037,7 +29977,7 @@ pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30059,7 +29999,7 @@ pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
- _mm512_cmpneq_epi32_mask(a, b) & k1
+ _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30081,7 +30021,7 @@ pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpneq_epi32_mask(a, b) & k1
+ _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30103,7 +30043,7 @@ pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epi32_mask(a, b) & k1
+ _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30119,11 +30059,19 @@ pub unsafe fn _mm512_cmp_epi32_mask(
b: __m512i,
) -> __mmask16 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i32x16();
let b = b.as_i32x16();
- let r = vpcmpd(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x16::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30142,8 +30090,18 @@ pub unsafe fn _mm512_mask_cmp_epi32_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i32x16();
let b = b.as_i32x16();
- let r = vpcmpd(a, b, IMM3, k1 as i16);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x16::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x16::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30159,11 +30117,19 @@ pub unsafe fn _mm256_cmp_epi32_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i32x8();
let b = b.as_i32x8();
- let r = vpcmpd256(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30182,8 +30148,18 @@ pub unsafe fn _mm256_mask_cmp_epi32_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i32x8();
let b = b.as_i32x8();
- let r = vpcmpd256(a, b, IMM3, k1 as i8);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30196,11 +30172,19 @@ pub unsafe fn _mm256_mask_cmp_epi32_mask(
#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
pub unsafe fn _mm_cmp_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i32x4();
let b = b.as_i32x4();
- let r = vpcmpd128(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i32x4::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i32x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30219,8 +30203,18 @@ pub unsafe fn _mm_mask_cmp_epi32_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i32x4();
let b = b.as_i32x4();
- let r = vpcmpd128(a, b, IMM3, k1 as i8);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i32x4::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i32x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30242,7 +30236,7 @@ pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmplt_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30264,7 +30258,7 @@ pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmplt_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30286,7 +30280,7 @@ pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30308,7 +30302,7 @@ pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpgt_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30330,7 +30324,7 @@ pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpgt_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30352,7 +30346,7 @@ pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30374,7 +30368,7 @@ pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmple_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30396,7 +30390,7 @@ pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmple_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30418,7 +30412,7 @@ pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30440,7 +30434,7 @@ pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpge_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30462,7 +30456,7 @@ pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpge_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30484,7 +30478,7 @@ pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -30506,7 +30500,7 @@ pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpeq_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -30528,7 +30522,7 @@ pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpeq_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -30550,7 +30544,7 @@ pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30572,7 +30566,7 @@ pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpneq_epu64_mask(a, b) & k1
+ _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30594,7 +30588,7 @@ pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpneq_epu64_mask(a, b) & k1
+ _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -30616,7 +30610,7 @@ pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epu64_mask(a, b) & k1
+ _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30632,11 +30626,19 @@ pub unsafe fn _mm512_cmp_epu64_mask(
b: __m512i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i64x8();
- let b = b.as_i64x8();
- let r = vpcmpuq(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u64x8();
+ let b = b.as_u64x8();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30653,10 +30655,20 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask(
b: __m512i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i64x8();
- let b = b.as_i64x8();
- let r = vpcmpuq(a, b, IMM3, k1 as i8);
- transmute(r)
+ let a = a.as_u64x8();
+ let b = b.as_u64x8();
+ let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30672,11 +30684,19 @@ pub unsafe fn _mm256_cmp_epu64_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i64x4();
- let b = b.as_i64x4();
- let r = vpcmpuq256(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u64x4();
+ let b = b.as_u64x4();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x4::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30693,10 +30713,20 @@ pub unsafe fn _mm256_mask_cmp_epu64_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i64x4();
- let b = b.as_i64x4();
- let r = vpcmpuq256(a, b, IMM3, k1 as i8);
- transmute(r)
+ let a = a.as_u64x4();
+ let b = b.as_u64x4();
+ let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x4::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -30709,11 +30739,19 @@ pub unsafe fn _mm256_mask_cmp_epu64_mask(
#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
pub unsafe fn _mm_cmp_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
- let a = a.as_i64x2();
- let b = b.as_i64x2();
- let r = vpcmpuq128(a, b, IMM3, neg_one);
- transmute(r)
+ let a = a.as_u64x2();
+ let b = b.as_u64x2();
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x2::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x2::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -30730,10 +30768,20 @@ pub unsafe fn _mm_mask_cmp_epu64_mask(
b: __m128i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let a = a.as_i64x2();
- let b = b.as_i64x2();
- let r = vpcmpuq128(a, b, IMM3, k1 as i8);
- transmute(r)
+ let a = a.as_u64x2();
+ let b = b.as_u64x2();
+ let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x2::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x2::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30755,7 +30803,7 @@ pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmplt_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30777,7 +30825,7 @@ pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmplt_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
@@ -30799,7 +30847,7 @@ pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmplt_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30821,7 +30869,7 @@ pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpgt_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30843,7 +30891,7 @@ pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpgt_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
@@ -30865,7 +30913,7 @@ pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpgt_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30887,7 +30935,7 @@ pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmple_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30909,7 +30957,7 @@ pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmple_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
@@ -30931,7 +30979,7 @@ pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmple_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30953,7 +31001,7 @@ pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpge_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30975,7 +31023,7 @@ pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpge_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
@@ -30997,7 +31045,7 @@ pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpge_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
}
/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -31019,7 +31067,7 @@ pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpeq_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -31041,7 +31089,7 @@ pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpeq_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
@@ -31063,7 +31111,7 @@ pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpeq_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -31085,7 +31133,7 @@ pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
- _mm512_cmpneq_epi64_mask(a, b) & k1
+ _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -31107,7 +31155,7 @@ pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
- _mm256_cmpneq_epi64_mask(a, b) & k1
+ _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
@@ -31129,7 +31177,7 @@ pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
- _mm_cmpneq_epi64_mask(a, b) & k1
+ _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -31145,11 +31193,19 @@ pub unsafe fn _mm512_cmp_epi64_mask(
b: __m512i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i64x8();
let b = b.as_i64x8();
- let r = vpcmpq(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x8::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -31168,8 +31224,18 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i64x8();
let b = b.as_i64x8();
- let r = vpcmpq(a, b, IMM3, k1 as i8);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x8::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x8::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -31185,11 +31251,19 @@ pub unsafe fn _mm256_cmp_epi64_mask(
b: __m256i,
) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i64x4();
let b = b.as_i64x4();
- let r = vpcmpq256(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x4::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -31208,8 +31282,18 @@ pub unsafe fn _mm256_mask_cmp_epi64_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i64x4();
let b = b.as_i64x4();
- let r = vpcmpq256(a, b, IMM3, k1 as i8);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x4::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x4::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
@@ -31222,11 +31306,19 @@ pub unsafe fn _mm256_mask_cmp_epi64_mask(
#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
pub unsafe fn _mm_cmp_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
static_assert_uimm_bits!(IMM3, 3);
- let neg_one = -1;
let a = a.as_i64x2();
let b = b.as_i64x2();
- let r = vpcmpq128(a, b, IMM3, neg_one);
- transmute(r)
+ let r = match IMM3 {
+ 0 => simd_eq(a, b),
+ 1 => simd_lt(a, b),
+ 2 => simd_le(a, b),
+ 3 => i64x2::splat(0),
+ 4 => simd_ne(a, b),
+ 5 => simd_ge(a, b),
+ 6 => simd_gt(a, b),
+ _ => i64x2::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
@@ -31245,8 +31337,18 @@ pub unsafe fn _mm_mask_cmp_epi64_mask(
static_assert_uimm_bits!(IMM3, 3);
let a = a.as_i64x2();
let b = b.as_i64x2();
- let r = vpcmpq128(a, b, IMM3, k1 as i8);
- transmute(r)
+ let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::splat(0));
+ let r = match IMM3 {
+ 0 => simd_and(k1, simd_eq(a, b)),
+ 1 => simd_and(k1, simd_lt(a, b)),
+ 2 => simd_and(k1, simd_le(a, b)),
+ 3 => i64x2::splat(0),
+ 4 => simd_and(k1, simd_ne(a, b)),
+ 5 => simd_and(k1, simd_ge(a, b)),
+ 6 => simd_and(k1, simd_gt(a, b)),
+ _ => i64x2::splat(-1),
+ };
+ simd_bitmask(r)
}
/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
@@ -35212,13 +35314,7 @@ pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
- transmute(vsqrtss(
- a.as_f32x4(),
- b.as_f32x4(),
- src.as_f32x4(),
- k,
- _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
- ))
+ vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
}
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -35229,13 +35325,7 @@ pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
- transmute(vsqrtss(
- a.as_f32x4(),
- b.as_f32x4(),
- _mm_setzero_ps().as_f32x4(),
- k,
- _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
- ))
+ vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION)
}
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -35246,13 +35336,7 @@ pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
- transmute(vsqrtsd(
- a.as_f64x2(),
- b.as_f64x2(),
- src.as_f64x2(),
- k,
- _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
- ))
+ vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
}
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -35263,13 +35347,7 @@ pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
- transmute(vsqrtsd(
- a.as_f64x2(),
- b.as_f64x2(),
- _mm_setzero_pd().as_f64x2(),
- k,
- _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
- ))
+ vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION)
}
/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
@@ -36014,13 +36092,13 @@ pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf32(fmadd, extractb, extractc);
}
simd_insert!(a, 0, fmadd)
}
@@ -36031,14 +36109,14 @@ pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fmadd)
}
@@ -36049,13 +36127,13 @@ pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmadd: f32 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
- fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf32(extracta, extractb, fmadd);
}
simd_insert!(c, 0, fmadd)
}
@@ -36066,13 +36144,13 @@ pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf64(fmadd, extractb, extractc);
}
simd_insert!(a, 0, fmadd)
}
@@ -36083,14 +36161,14 @@ pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fmadd)
}
@@ -36101,13 +36179,13 @@ pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd))]
+#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmadd: f64 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
- fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
+ fmadd = fmaf64(extracta, extractb, fmadd);
}
simd_insert!(c, 0, fmadd)
}
@@ -36118,14 +36196,14 @@ pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf32(fmsub, extractb, extractc);
}
simd_insert!(a, 0, fmsub)
}
@@ -36136,7 +36214,7 @@ pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) ->
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
@@ -36144,7 +36222,7 @@ pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fmsub)
}
@@ -36155,14 +36233,14 @@ pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmsub: f32 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
let extractc = -fmsub;
- fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf32(extracta, extractb, extractc);
}
simd_insert!(c, 0, fmsub)
}
@@ -36173,14 +36251,14 @@ pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf64(fmsub, extractb, extractc);
}
simd_insert!(a, 0, fmsub)
}
@@ -36191,7 +36269,7 @@ pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
@@ -36199,7 +36277,7 @@ pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fmsub)
}
@@ -36210,14 +36288,14 @@ pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd))]
+#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmsub: f64 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
let extractc = -fmsub;
- fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fmsub = fmaf64(extracta, extractb, extractc);
}
simd_insert!(c, 0, fmsub)
}
@@ -36228,14 +36306,14 @@ pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmadd)
}
@@ -36246,7 +36324,7 @@ pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
@@ -36254,7 +36332,7 @@ pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmadd)
}
@@ -36265,14 +36343,14 @@ pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmadd: f32 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract!(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
- fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf32(extracta, extractb, fnmadd);
}
simd_insert!(c, 0, fnmadd)
}
@@ -36283,14 +36361,14 @@ pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmadd)
}
@@ -36301,7 +36379,7 @@ pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
@@ -36309,7 +36387,7 @@ pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
let extracta = -extracta;
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmadd)
}
@@ -36320,14 +36398,14 @@ pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd))]
+#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmadd: f64 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract!(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract!(b, 0);
- fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
+ fnmadd = fmaf64(extracta, extractb, fnmadd);
}
simd_insert!(c, 0, fnmadd)
}
@@ -36338,7 +36416,7 @@ pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213ss))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
@@ -36346,7 +36424,7 @@ pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmsub)
}
@@ -36357,7 +36435,7 @@ pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213ss))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
@@ -36366,7 +36444,7 @@ pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf32(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmsub)
}
@@ -36377,7 +36455,7 @@ pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213ss))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmsub: f32 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
@@ -36385,7 +36463,7 @@ pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8)
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
let extractc = -fnmsub;
- fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf32(extracta, extractb, extractc);
}
simd_insert!(c, 0, fnmsub)
}
@@ -36396,7 +36474,7 @@ pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213sd))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = simd_extract!(a, 0);
if (k & 0b00000001) != 0 {
@@ -36404,7 +36482,7 @@ pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmsub)
}
@@ -36415,7 +36493,7 @@ pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213sd))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
@@ -36424,7 +36502,7 @@ pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf64(extracta, extractb, extractc);
}
simd_insert!(a, 0, fnmsub)
}
@@ -36435,7 +36513,7 @@ pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmsub213sd))]
+#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmsub: f64 = simd_extract!(c, 0);
if (k & 0b00000001) != 0 {
@@ -36443,7 +36521,7 @@ pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
let extracta = -extracta;
let extractb: f64 = simd_extract!(b, 0);
let extractc = -fnmsub;
- fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
+ fnmsub = fmaf64(extracta, extractb, extractc);
}
simd_insert!(c, 0, fnmsub)
}
@@ -37357,11 +37435,7 @@ pub unsafe fn _mm_maskz_min_round_sd(
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128) -> __m128 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x4();
- let b = b.as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- let r = vsqrtss(a, b, zero, 0b1, ROUNDING);
- transmute(r)
+ vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
}
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -37386,11 +37460,7 @@ pub unsafe fn _mm_mask_sqrt_round_ss(
b: __m128,
) -> __m128 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x4();
- let b = b.as_f32x4();
- let src = src.as_f32x4();
- let r = vsqrtss(a, b, src, k, ROUNDING);
- transmute(r)
+ vsqrtss(a, b, src, k, ROUNDING)
}
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -37414,11 +37484,7 @@ pub unsafe fn _mm_maskz_sqrt_round_ss(
b: __m128,
) -> __m128 {
static_assert_rounding!(ROUNDING);
- let a = a.as_f32x4();
- let b = b.as_f32x4();
- let zero = _mm_setzero_ps().as_f32x4();
- let r = vsqrtss(a, b, zero, k, ROUNDING);
- transmute(r)
+ vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
}
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -37438,11 +37504,7 @@ pub unsafe fn _mm_maskz_sqrt_round_ss(
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d) -> __m128d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x2();
- let b = b.as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- let r = vsqrtsd(a, b, zero, 0b1, ROUNDING);
- transmute(r)
+ vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
}
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -37467,11 +37529,7 @@ pub unsafe fn _mm_mask_sqrt_round_sd(
b: __m128d,
) -> __m128d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x2();
- let b = b.as_f64x2();
- let src = src.as_f64x2();
- let r = vsqrtsd(a, b, src, k, ROUNDING);
- transmute(r)
+ vsqrtsd(a, b, src, k, ROUNDING)
}
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -37495,11 +37553,7 @@ pub unsafe fn _mm_maskz_sqrt_round_sd(
b: __m128d,
) -> __m128d {
static_assert_rounding!(ROUNDING);
- let a = a.as_f64x2();
- let b = b.as_f64x2();
- let zero = _mm_setzero_pd().as_f64x2();
- let r = vsqrtsd(a, b, zero, k, ROUNDING);
- transmute(r)
+ vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
}
/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
@@ -38198,14 +38252,14 @@ pub unsafe fn _mm_maskz_scalef_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
static_assert_rounding!(ROUNDING);
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- let r = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, r)
}
@@ -38222,7 +38276,7 @@ pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: _
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fmadd_round_ss(
a: __m128,
@@ -38235,7 +38289,7 @@ pub unsafe fn _mm_mask_fmadd_round_ss(
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fmadd = vfmadd132ss(fmadd, extractb, extractc, ROUNDING);
+ fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmadd)
}
@@ -38253,7 +38307,7 @@ pub unsafe fn _mm_mask_fmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fmadd_round_ss(
k: __mmask8,
@@ -38267,7 +38321,7 @@ pub unsafe fn _mm_maskz_fmadd_round_ss(
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmadd)
}
@@ -38285,7 +38339,7 @@ pub unsafe fn _mm_maskz_fmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fmadd_round_ss(
a: __m128,
@@ -38298,7 +38352,7 @@ pub unsafe fn _mm_mask3_fmadd_round_ss(
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
- fmadd = vfmadd132ss(extracta, extractb, fmadd, ROUNDING);
+ fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
}
simd_insert!(c, 0, fmadd)
}
@@ -38316,7 +38370,7 @@ pub unsafe fn _mm_mask3_fmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fmadd_round_sd(
a: __m128d,
@@ -38327,7 +38381,7 @@ pub unsafe fn _mm_fmadd_round_sd(
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- let fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, fmadd)
}
@@ -38344,7 +38398,7 @@ pub unsafe fn _mm_fmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fmadd_round_sd(
a: __m128d,
@@ -38357,7 +38411,7 @@ pub unsafe fn _mm_mask_fmadd_round_sd(
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fmadd = vfmadd132sd(fmadd, extractb, extractc, ROUNDING);
+ fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmadd)
}
@@ -38375,7 +38429,7 @@ pub unsafe fn _mm_mask_fmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fmadd_round_sd(
k: __mmask8,
@@ -38389,7 +38443,7 @@ pub unsafe fn _mm_maskz_fmadd_round_sd(
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmadd)
}
@@ -38407,7 +38461,7 @@ pub unsafe fn _mm_maskz_fmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fmadd_round_sd(
a: __m128d,
@@ -38420,7 +38474,7 @@ pub unsafe fn _mm_mask3_fmadd_round_sd(
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
- fmadd = vfmadd132sd(extracta, extractb, fmadd, ROUNDING);
+ fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
}
simd_insert!(c, 0, fmadd)
}
@@ -38438,7 +38492,7 @@ pub unsafe fn _mm_mask3_fmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
static_assert_rounding!(ROUNDING);
@@ -38446,7 +38500,7 @@ pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: _
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- let fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, fmsub)
}
@@ -38463,7 +38517,7 @@ pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: _
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fmsub_round_ss(
a: __m128,
@@ -38477,7 +38531,7 @@ pub unsafe fn _mm_mask_fmsub_round_ss(
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132ss(fmsub, extractb, extractc, ROUNDING);
+ fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmsub)
}
@@ -38495,7 +38549,7 @@ pub unsafe fn _mm_mask_fmsub_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fmsub_round_ss(
k: __mmask8,
@@ -38510,7 +38564,7 @@ pub unsafe fn _mm_maskz_fmsub_round_ss(
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmsub)
}
@@ -38528,7 +38582,7 @@ pub unsafe fn _mm_maskz_fmsub_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fmsub_round_ss(
a: __m128,
@@ -38542,7 +38596,7 @@ pub unsafe fn _mm_mask3_fmsub_round_ss(
let extracta: f32 = simd_extract!(a, 0);
let extractb: f32 = simd_extract!(b, 0);
let extractc = -fmsub;
- fmsub = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(c, 0, fmsub)
}
@@ -38560,7 +38614,7 @@ pub unsafe fn _mm_mask3_fmsub_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fmsub_round_sd(
a: __m128d,
@@ -38572,7 +38626,7 @@ pub unsafe fn _mm_fmsub_round_sd(
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- let fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, fmsub)
}
@@ -38589,7 +38643,7 @@ pub unsafe fn _mm_fmsub_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fmsub_round_sd(
a: __m128d,
@@ -38603,7 +38657,7 @@ pub unsafe fn _mm_mask_fmsub_round_sd(
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132sd(fmsub, extractb, extractc, ROUNDING);
+ fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmsub)
}
@@ -38621,7 +38675,7 @@ pub unsafe fn _mm_mask_fmsub_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fmsub_round_sd(
k: __mmask8,
@@ -38636,7 +38690,7 @@ pub unsafe fn _mm_maskz_fmsub_round_sd(
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
let extractc = -extractc;
- fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fmsub)
}
@@ -38654,7 +38708,7 @@ pub unsafe fn _mm_maskz_fmsub_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmsub213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fmsub_round_sd(
a: __m128d,
@@ -38668,7 +38722,7 @@ pub unsafe fn _mm_mask3_fmsub_round_sd(
let extracta: f64 = simd_extract!(a, 0);
let extractb: f64 = simd_extract!(b, 0);
let extractc = -fmsub;
- fmsub = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(c, 0, fmsub)
}
@@ -38686,7 +38740,7 @@ pub unsafe fn _mm_mask3_fmsub_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
static_assert_rounding!(ROUNDING);
@@ -38694,7 +38748,7 @@ pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c:
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- let fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, fnmadd)
}
@@ -38711,7 +38765,7 @@ pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c:
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fnmadd_round_ss(
a: __m128,
@@ -38725,7 +38779,7 @@ pub unsafe fn _mm_mask_fnmadd_round_ss(
let extracta = -fnmadd;
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fnmadd)
}
@@ -38743,7 +38797,7 @@ pub unsafe fn _mm_mask_fnmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fnmadd_round_ss(
k: __mmask8,
@@ -38758,7 +38812,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss(
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
let extractc: f32 = simd_extract!(c, 0);
- fnmadd = vfmadd132ss(extracta, extractb, extractc, ROUNDING);
+ fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fnmadd)
}
@@ -38776,7 +38830,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213ss, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fnmadd_round_ss(
a: __m128,
@@ -38790,7 +38844,7 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss(
let extracta: f32 = simd_extract!(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract!(b, 0);
- fnmadd = vfmadd132ss(extracta, extractb, fnmadd, ROUNDING);
+ fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
}
simd_insert!(c, 0, fnmadd)
}
@@ -38808,7 +38862,7 @@ pub unsafe fn _mm_mask3_fnmadd_round_ss(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_fnmadd_round_sd(
a: __m128d,
@@ -38820,7 +38874,7 @@ pub unsafe fn _mm_fnmadd_round_sd(
let extracta = -extracta;
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- let fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
simd_insert!(a, 0, fnmadd)
}
@@ -38837,7 +38891,7 @@ pub unsafe fn _mm_fnmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_fnmadd_round_sd(
a: __m128d,
@@ -38851,7 +38905,7 @@ pub unsafe fn _mm_mask_fnmadd_round_sd(
let extracta = -fnmadd;
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fnmadd)
}
@@ -38869,7 +38923,7 @@ pub unsafe fn _mm_mask_fnmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_maskz_fnmadd_round_sd(
k: __mmask8,
@@ -38884,7 +38938,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd(
let extracta = -extracta;
let extractb: f64 = simd_extract!(b, 0);
let extractc: f64 = simd_extract!(c, 0);
- fnmadd = vfmadd132sd(extracta, extractb, extractc, ROUNDING);
+ fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
}
simd_insert!(a, 0, fnmadd)
}
@@ -38902,7 +38956,7 @@ pub unsafe fn _mm_maskz_fnmadd_round_sd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfnmadd213sd, ROUNDING = 8))]
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask3_fnmadd_round_sd