Revert "[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx …

…into sse/sse2 (#67410)" Does not respect `__attribute__((target("avx"))`. This reverts commit ccd5b8d.
llvm · Oct 5, 2023 · 821dfc3 · 821dfc3
1 parent baecc9e
commit 821dfc3
Show file tree

Hide file tree

Showing 11 changed files with 353 additions and 393 deletions.
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
@@ -233,8 +233,6 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
 TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
 TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
 TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse")
 
 TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -252,8 +250,6 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -473,8 +469,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
 TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")

diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
@@ -1569,6 +1569,15 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   ((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
                                      (__v4df)(__m256d)(b), (int)(mask)))
 
+/* Compare */
+#define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
+#define _CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
+#define _CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
+#define _CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
+#define _CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
+#define _CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
+#define _CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
+#define _CMP_ORD_Q    0x07 /* Ordered (non-signaling)   */
 #define _CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
 #define _CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unordered, signaling)  */
 #define _CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
@@ -1594,6 +1603,126 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 #define _CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
 #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
 
+/// Compares each of the corresponding double-precision values of two
+///    128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand.
+///
+///    Returns a [2 x double] vector consisting of two doubles corresponding to
+///    the two comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
+/// \returns A 128-bit vector of [2 x double] containing the comparison results.
+#define _mm_cmp_pd(a, b, c) \
+  ((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+                                 (__v2df)(__m128d)(b), (c)))
+
+/// Compares each of the corresponding values of two 128-bit vectors of
+///    [4 x float], using the operation specified by the immediate integer
+///    operand.
+///
+///    Returns a [4 x float] vector consisting of four floats corresponding to
+///    the four comparison results: zero if the comparison is false, and all 1's
+///    if the comparison is true.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [4 x float].
+/// \param b
+///    A 128-bit vector of [4 x float].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
+#define _mm_cmp_ps(a, b, c) \
+  ((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+                                (__v4sf)(__m128)(b), (c)))
+
 /// Compares each of the corresponding double-precision values of two
 ///    256-bit vectors of [4 x double], using the operation specified by the
 ///    immediate integer operand.
@@ -1714,6 +1843,124 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
   ((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
                                    (__v8sf)(__m256)(b), (c)))
 
+/// Compares each of the corresponding scalar double-precision values of
+///    two 128-bit vectors of [2 x double], using the operation specified by the
+///    immediate integer operand.
+///
+///    If the result is true, all 64 bits of the destination vector are set;
+///    otherwise they are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [2 x double].
+/// \param b
+///    A 128-bit vector of [2 x double].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
+/// \returns A 128-bit vector of [2 x double] containing the comparison results.
+#define _mm_cmp_sd(a, b, c) \
+  ((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+                                 (__v2df)(__m128d)(b), (c)))
+
+/// Compares each of the corresponding scalar values of two 128-bit
+///    vectors of [4 x float], using the operation specified by the immediate
+///    integer operand.
+///
+///    If the result is true, all 32 bits of the destination vector are set;
+///    otherwise they are cleared.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.
+///
+/// \param a
+///    A 128-bit vector of [4 x float].
+/// \param b
+///    A 128-bit vector of [4 x float].
+/// \param c
+///    An immediate integer operand, with bits [4:0] specifying which comparison
+///    operation to use: \n
+///    0x00: Equal (ordered, non-signaling) \n
+///    0x01: Less-than (ordered, signaling) \n
+///    0x02: Less-than-or-equal (ordered, signaling) \n
+///    0x03: Unordered (non-signaling) \n
+///    0x04: Not-equal (unordered, non-signaling) \n
+///    0x05: Not-less-than (unordered, signaling) \n
+///    0x06: Not-less-than-or-equal (unordered, signaling) \n
+///    0x07: Ordered (non-signaling) \n
+///    0x08: Equal (unordered, non-signaling) \n
+///    0x09: Not-greater-than-or-equal (unordered, signaling) \n
+///    0x0A: Not-greater-than (unordered, signaling) \n
+///    0x0B: False (ordered, non-signaling) \n
+///    0x0C: Not-equal (ordered, non-signaling) \n
+///    0x0D: Greater-than-or-equal (ordered, signaling) \n
+///    0x0E: Greater-than (ordered, signaling) \n
+///    0x0F: True (unordered, non-signaling) \n
+///    0x10: Equal (ordered, signaling) \n
+///    0x11: Less-than (ordered, non-signaling) \n
+///    0x12: Less-than-or-equal (ordered, non-signaling) \n
+///    0x13: Unordered (signaling) \n
+///    0x14: Not-equal (unordered, signaling) \n
+///    0x15: Not-less-than (unordered, non-signaling) \n
+///    0x16: Not-less-than-or-equal (unordered, non-signaling) \n
+///    0x17: Ordered (signaling) \n
+///    0x18: Equal (unordered, signaling) \n
+///    0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
+///    0x1A: Not-greater-than (unordered, non-signaling) \n
+///    0x1B: False (ordered, signaling) \n
+///    0x1C: Not-equal (ordered, signaling) \n
+///    0x1D: Greater-than-or-equal (ordered, non-signaling) \n
+///    0x1E: Greater-than (ordered, non-signaling) \n
+///    0x1F: True (unordered, signaling)
+/// \returns A 128-bit vector of [4 x float] containing the comparison results.
+#define _mm_cmp_ss(a, b, c) \
+  ((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+                                (__v4sf)(__m128)(b), (c)))
+
 /// Takes a [8 x i32] vector and returns the vector element value
 ///    indexed by the immediate constant operand.
 ///