Skip to content

Commit 4f79552

Browse files
authored
[x86][AVX-VNNI] Fix VPDPWXXD Argument Types (#169456)
Fixed the argument types of the following intrinsics to match with the ISA: - vpdpwssd_128, vpdpwssd_256, vpdpwssd_512, - vpdpwssds_128, vpdpwssds_256, vpdpwssds_512 - vpdpwsud_128, vpdpwsud_256, vpdowsud_512 - vpdpwsuds_128, vpdpwsuds_256, vpdpwsuds_512 - vpdpwusd_128, vpdpwusd_256, vpdpwusd_512 - vpdpwusds_128, vpdpwusds_256, vpdpwusds_512 - vpdpwuud_128, vpdpwuud_256, vpdpwuud_512 - vpdpwuuds_128, vpdpwuuds_256, vpdpwuuds_512 Fixes #97271. Note that this is the last PR for the issue.
1 parent 1bada0a commit 4f79552

35 files changed

+1870
-900
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,27 +1088,27 @@ let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<5
10881088
}
10891089

10901090
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1091-
def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1091+
def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
10921092
}
10931093

10941094
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1095-
def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1095+
def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
10961096
}
10971097

10981098
let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1099-
def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1099+
def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
11001100
}
11011101

11021102
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
1103-
def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
1103+
def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">;
11041104
}
11051105

11061106
let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
1107-
def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
1107+
def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">;
11081108
}
11091109

11101110
let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1111-
def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
1111+
def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">;
11121112
}
11131113

11141114
let Features = "avxvnniint8|avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
@@ -4222,64 +4222,64 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
42224222
}
42234223

42244224
let Features = "avx10.2", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
4225-
def vpdpwsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4226-
def vpdpwsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4227-
def vpdpwusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4228-
def vpdpwusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4229-
def vpdpwuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4230-
def vpdpwuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4225+
def vpdpwsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, unsigned short>)">;
4226+
def vpdpwsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, unsigned short>)">;
4227+
def vpdpwusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, short>)">;
4228+
def vpdpwusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, short>)">;
4229+
def vpdpwuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
4230+
def vpdpwuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
42314231
}
42324232

42334233
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
42344234
def mpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant char)">;
42354235
}
42364236

42374237
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4238-
def vpdpwsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4238+
def vpdpwsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, unsigned short>)">;
42394239
}
42404240

42414241
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4242-
def vpdpwsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4242+
def vpdpwsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, unsigned short>)">;
42434243
}
42444244

42454245
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4246-
def vpdpwsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4246+
def vpdpwsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, unsigned short>)">;
42474247
}
42484248

42494249
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4250-
def vpdpwsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4250+
def vpdpwsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, unsigned short>)">;
42514251
}
42524252

42534253
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4254-
def vpdpwusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4254+
def vpdpwusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, short>)">;
42554255
}
42564256

42574257
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4258-
def vpdpwusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4258+
def vpdpwusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, short>)">;
42594259
}
42604260

42614261
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4262-
def vpdpwusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4262+
def vpdpwusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, short>)">;
42634263
}
42644264

42654265
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4266-
def vpdpwusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4266+
def vpdpwusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, short>)">;
42674267
}
42684268

42694269
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4270-
def vpdpwuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4270+
def vpdpwuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
42714271
}
42724272

42734273
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4274-
def vpdpwuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4274+
def vpdpwuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
42754275
}
42764276

42774277
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4278-
def vpdpwuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4278+
def vpdpwuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
42794279
}
42804280

42814281
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4282-
def vpdpwuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4282+
def vpdpwuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
42834283
}
42844284

42854285
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/Headers/avx10_2_512niintrin.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32(
185185
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A,
186186
__m512i __B,
187187
__m512i __C) {
188-
return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B,
189-
(__v16si)__C);
188+
return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v32hi)__B,
189+
(__v32hu)__C);
190190
}
191191

192192
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -206,8 +206,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
206206
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A,
207207
__m512i __B,
208208
__m512i __C) {
209-
return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B,
210-
(__v16si)__C);
209+
return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v32hi)__B,
210+
(__v32hu)__C);
211211
}
212212

213213
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
@@ -227,8 +227,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
227227
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A,
228228
__m512i __B,
229229
__m512i __C) {
230-
return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B,
231-
(__v16si)__C);
230+
return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v32hu)__B,
231+
(__v32hi)__C);
232232
}
233233

234234
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -248,8 +248,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
248248
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A,
249249
__m512i __B,
250250
__m512i __C) {
251-
return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B,
252-
(__v16si)__C);
251+
return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v32hu)__B,
252+
(__v32hi)__C);
253253
}
254254

255255
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
@@ -269,8 +269,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
269269
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A,
270270
__m512i __B,
271271
__m512i __C) {
272-
return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B,
273-
(__v16si)__C);
272+
return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v32hu)__B,
273+
(__v32hu)__C);
274274
}
275275

276276
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -290,8 +290,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
290290
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A,
291291
__m512i __B,
292292
__m512i __C) {
293-
return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B,
294-
(__v16si)__C);
293+
return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v32hu)__B,
294+
(__v32hu)__C);
295295
}
296296

297297
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(

clang/lib/Headers/avx512vlvnniintrin.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@
8080
/// ENDFOR
8181
/// DST[MAX:256] := 0
8282
/// \endcode
83-
#define _mm256_dpwssd_epi32(S, A, B) \
84-
((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
83+
#define _mm256_dpwssd_epi32(S, A, B) \
84+
((__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v16hi)(A), (__v16hi)(B)))
8585

8686
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
8787
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -98,8 +98,9 @@
9898
/// ENDFOR
9999
/// DST[MAX:256] := 0
100100
/// \endcode
101-
#define _mm256_dpwssds_epi32(S, A, B) \
102-
((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)))
101+
#define _mm256_dpwssds_epi32(S, A, B) \
102+
((__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v16hi)(A), \
103+
(__v16hi)(B)))
103104

104105
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with
105106
/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed
@@ -157,8 +158,8 @@
157158
/// ENDFOR
158159
/// DST[MAX:128] := 0
159160
/// \endcode
160-
#define _mm_dpwssd_epi32(S, A, B) \
161-
((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
161+
#define _mm_dpwssd_epi32(S, A, B) \
162+
((__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
162163

163164
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with
164165
/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit
@@ -175,8 +176,8 @@
175176
/// ENDFOR
176177
/// DST[MAX:128] := 0
177178
/// \endcode
178-
#define _mm_dpwssds_epi32(S, A, B) \
179-
((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)))
179+
#define _mm_dpwssds_epi32(S, A, B) \
180+
((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B)))
180181

181182
static __inline__ __m256i __DEFAULT_FN_ATTRS256
182183
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)

clang/lib/Headers/avx512vnniintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
6868
static __inline__ __m512i __DEFAULT_FN_ATTRS
6969
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
7070
{
71-
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
72-
(__v16si)__B);
71+
return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A,
72+
(__v32hi)__B);
7373
}
7474

7575
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -91,8 +91,8 @@ _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
9191
static __inline__ __m512i __DEFAULT_FN_ATTRS
9292
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
9393
{
94-
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
95-
(__v16si)__B);
94+
return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A,
95+
(__v32hi)__B);
9696
}
9797

9898
static __inline__ __m512i __DEFAULT_FN_ATTRS

0 commit comments

Comments
 (0)