From 37a37e28917645ea707c988726359a10cf6c264d Mon Sep 17 00:00:00 2001 From: Daniel Smith Date: Sat, 30 May 2020 19:52:00 +0000 Subject: [PATCH] Add 64 bit AVX512f le and ge comparisons --- crates/core_arch/src/x86/avx512f.rs | 84 ++++++++++++++++++++++++++ crates/core_arch/src/x86_64/avx512f.rs | 72 ++++++++++++++++++++++ 2 files changed, 156 insertions(+) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 421146d53d..7bc2671a96 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -136,6 +136,48 @@ pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) _mm512_cmpgt_epu64_mask(a, b) & m } +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epu64_mask(b, a) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epu64_mask(b, a) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epu64_mask(b, a) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epu64_mask(b, a) & m +} + /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) @@ -199,6 +241,48 @@ pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) _mm512_cmpgt_epi64_mask(a, b) & m } +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epi64_mask(b, a) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epi64_mask(b, a) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epi64_mask(b, a) +} + +///Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epi64_mask(b, a) & m +} + /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index ad2e29e5cc..0b32988d7c 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -83,6 +83,42 @@ mod tests { assert_eq!(r, 0b01001010); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!(_mm512_cmple_epu64_mask(a, b), _mm512_cmpgt_epu64_mask(b, a)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!( + _mm512_mask_cmple_epu64_mask(mask, a, b), + _mm512_mask_cmpgt_epu64_mask(mask, b, a) + ); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!(_mm512_cmpge_epu64_mask(a, b), _mm512_cmplt_epu64_mask(b, a)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!( + _mm512_mask_cmpge_epu64_mask(mask, a, b), + _mm512_mask_cmplt_epu64_mask(mask, b, a) + ); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cmpeq_epu64_mask() { let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); @@ -134,6 +170,42 @@ mod tests { assert_eq!(r, 0b00000100); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!(_mm512_cmple_epi64_mask(a, b), _mm512_cmpgt_epi64_mask(b, a)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!( + _mm512_mask_cmple_epi64_mask(mask, a, b), + _mm512_mask_cmpgt_epi64_mask(mask, b, a) + ); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!(_mm512_cmpge_epi64_mask(a, b), _mm512_cmplt_epi64_mask(b, a)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!( + _mm512_mask_cmpge_epi64_mask(mask, a, b), + _mm512_mask_cmplt_epi64_mask(mask, b, a) + ); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cmpeq_epi64_mask() { let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);