-
Notifications
You must be signed in to change notification settings - Fork 15k
[x86][AVX-VNNI] Fix VPDPBXXD Argument Type #159222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fixed intrinsic VPDP[SS,SU,UU]D[,S]_128/256/512's argument types to match with the ISA. Fixes part of llvm#97271.
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
@llvm/pr-subscribers-mlir-vector @llvm/pr-subscribers-backend-x86 Author: BaiXilin (BaiXilin) ChangesFixed intrinsic VPDP[SS,SU,UU]D[,S]_128/256/512's argument types to match with the ISA. Patch is 278.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159222.diff 17 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index aac502091b57e..3be3a5e375ae8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1106,51 +1106,51 @@ let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<5
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
}
let Features = "movrs", Attributes = [NoThrow, Const] in {
@@ -4276,12 +4276,12 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vdpphps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
- def vpdpbssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+ def vpdpbssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, char>)">;
+ def vpdpbssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, char>)">;
+ def vpdpbsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, unsigned char>)">;
+ def vpdpbsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, unsigned char>)">;
+ def vpdpbuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
+ def vpdpbuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
}
let Features = "avx10.2", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h
index 67679fce82296..fdb57c7c9e27b 100644
--- a/clang/lib/Headers/avx10_2_512niintrin.h
+++ b/clang/lib/Headers/avx10_2_512niintrin.h
@@ -64,8 +64,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U,
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v64qi)__A,
+ (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -84,8 +84,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v64qi)__A,
+ (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32(
@@ -104,8 +104,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v64qi)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -124,8 +124,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v64qi)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32(
@@ -144,8 +144,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v64qu)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -164,8 +164,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v64qu)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32(
diff --git a/clang/lib/Headers/avxvnniint8intrin.h b/clang/lib/Headers/avxvnniint8intrin.h
index c211620c68f07..858b66b138f31 100644
--- a/clang/lib/Headers/avxvnniint8intrin.h
+++ b/clang/lib/Headers/avxvnniint8intrin.h
@@ -14,6 +14,7 @@
#ifndef __AVXVNNIINT8INTRIN_H
#define __AVXVNNIINT8INTRIN_H
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -44,10 +45,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbssd_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -78,10 +81,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbssd_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -94,7 +99,7 @@
/// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -113,10 +118,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbssds_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -129,7 +136,7 @@
/// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -148,10 +155,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbssds_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -163,7 +172,7 @@
/// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -182,10 +191,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbsud_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -197,7 +208,7 @@
/// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -216,10 +227,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbsud_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -232,7 +245,7 @@
/// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -251,10 +264,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbsuds_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -267,7 +282,7 @@
/// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -286,10 +301,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbsuds_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -301,7 +318,7 @@
/// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
///
/// \param __A
/// A 128-bit vector of [16 x unsigned char].
@@ -320,10 +337,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbuud_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v16qu)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -335,7 +354,7 @@
/// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
///
/// \param __A
/// A 256-bit vector of [32 x unsigned char].
@@ -354,10 +373,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbuud_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v32qu)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -389,10 +410,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbuuds_epi32(__W, __A, __B) ...
[truncated]
|
|
@llvm/pr-subscribers-compiler-rt-sanitizer Author: BaiXilin (BaiXilin) ChangesFixed intrinsic VPDP[SS,SU,UU]D[,S]_128/256/512's argument types to match with the ISA. Patch is 278.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/159222.diff 17 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index aac502091b57e..3be3a5e375ae8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1106,51 +1106,51 @@ let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<5
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpdpbuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
+ def vpdpbuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
}
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpdpbuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
+ def vpdpbuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
}
let Features = "movrs", Attributes = [NoThrow, Const] in {
@@ -4276,12 +4276,12 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vdpphps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, _Float16>, _Vector<32, _Float16>)">;
- def vpdpbssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
- def vpdpbuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
+ def vpdpbssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, char>)">;
+ def vpdpbssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, char>)">;
+ def vpdpbsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, unsigned char>)">;
+ def vpdpbsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, char>, _Vector<64, unsigned char>)">;
+ def vpdpbuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
+ def vpdpbuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
}
let Features = "avx10.2", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h
index 67679fce82296..fdb57c7c9e27b 100644
--- a/clang/lib/Headers/avx10_2_512niintrin.h
+++ b/clang/lib/Headers/avx10_2_512niintrin.h
@@ -64,8 +64,8 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U,
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v64qi)__A,
+ (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -84,8 +84,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v64qi)__A,
+ (__v64qi)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32(
@@ -104,8 +104,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v64qi)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -124,8 +124,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v64qi)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32(
@@ -144,8 +144,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v64qu)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -164,8 +164,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32(
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W,
__m512i __A,
__m512i __B) {
- return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A,
- (__v16si)__B);
+ return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v64qu)__A,
+ (__v64qu)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32(
diff --git a/clang/lib/Headers/avxvnniint8intrin.h b/clang/lib/Headers/avxvnniint8intrin.h
index c211620c68f07..858b66b138f31 100644
--- a/clang/lib/Headers/avxvnniint8intrin.h
+++ b/clang/lib/Headers/avxvnniint8intrin.h
@@ -14,6 +14,7 @@
#ifndef __AVXVNNIINT8INTRIN_H
#define __AVXVNNIINT8INTRIN_H
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -44,10 +45,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbssd_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -78,10 +81,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbssd_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -94,7 +99,7 @@
/// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -113,10 +118,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbssds_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -129,7 +136,7 @@
/// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSSDS instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -148,10 +155,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbssds_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qi)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -163,7 +172,7 @@
/// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -182,10 +191,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbsud_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -197,7 +208,7 @@
/// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUD instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -216,10 +227,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbsud_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -232,7 +245,7 @@
/// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
///
/// \param __A
/// A 128-bit vector of [16 x char].
@@ -251,10 +264,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbsuds_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v16qi)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -267,7 +282,7 @@
/// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBSUDS instruction.
///
/// \param __A
/// A 256-bit vector of [32 x char].
@@ -286,10 +301,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbsuds_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v32qi)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -301,7 +318,7 @@
/// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
///
/// \param __A
/// A 128-bit vector of [16 x unsigned char].
@@ -320,10 +337,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbuud_epi32(__W, __A, __B) \
- ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A), \
- (__v4si)(__B)))
+ ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v16qu)(__A), \
+ (__v16qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -335,7 +354,7 @@
/// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B);
/// \endcode
///
-/// This intrinsic corresponds to the \c VPDPBSSD instruction.
+/// This intrinsic corresponds to the \c VPDPBUUD instruction.
///
/// \param __A
/// A 256-bit vector of [32 x unsigned char].
@@ -354,10 +373,12 @@
/// ENDFOR
/// dst[MAX:256] := 0
/// \endcode
+// clang-format on
#define _mm256_dpbuud_epi32(__W, __A, __B) \
- ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A), \
- (__v8si)(__B)))
+ ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v32qu)(__A), \
+ (__v32qu)(__B)))
+// clang-format off
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with
/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate
/// signed 16-bit results. Sum these 4 results with the corresponding
@@ -389,10 +410,12 @@
/// ENDFOR
/// dst[MAX:128] := 0
/// \endcode
+// clang-format on
#define _mm_dpbuuds_epi32(__W, __A, __B) ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I approve of the MemorySanitizer-related portion of this pull request. (The rest of the change looks reasonable to me but I'll defer to the code owners / experts.) Thanks for adding the intrinsic cases and updating the test files :-).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needs the avxvnniint8 intrinsics upgrade tests moving to avxvnniint8-intrinsics-upgrade.ll
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/23331 Here is the relevant piece of the build log for the reference |
Fixed intrinsic VPDP[SS,SU,UU]D[,S]_128/256/512's argument types to match with the ISA. Fixes part of llvm#97271.
Fixed intrinsic VPDP[SS,SU,UU]D[,S]_128/256/512's argument types to match with the ISA.
Fixes part of #97271.