Skip to content

Commit cc40d7b

Browse files
committed
- vpdpwsud_128, vpdpwsud_256, vpdowsud_512
- vpdpwsuds_128, vpdpwsuds_256, vpdpwsuds_512 - vpdpwusd_128, vpdpwusd_256, vpdpwusd_512 - vpdpwusds_128, vpdpwusds_256, vpdpwusds_512 - vpdpwuud_128, vpdpwuud_256, vpdpwuud_512 - vpdpwuuds_128, vpdpwuuds_256, vpdpwuuds_512
1 parent 3735e20 commit cc40d7b

18 files changed

+1371
-552
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4325,64 +4325,64 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
43254325
}
43264326

43274327
let Features = "avx10.2", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
4328-
def vpdpwsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4329-
def vpdpwsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4330-
def vpdpwusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4331-
def vpdpwusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4332-
def vpdpwuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4333-
def vpdpwuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
4328+
def vpdpwsud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, unsigned short>)">;
4329+
def vpdpwsuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, unsigned short>)">;
4330+
def vpdpwusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, short>)">;
4331+
def vpdpwusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, short>)">;
4332+
def vpdpwuud512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
4333+
def vpdpwuuds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
43344334
}
43354335

43364336
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
43374337
def mpsadbw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>, _Constant char)">;
43384338
}
43394339

43404340
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4341-
def vpdpwsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4341+
def vpdpwsud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, unsigned short>)">;
43424342
}
43434343

43444344
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4345-
def vpdpwsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4345+
def vpdpwsud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, unsigned short>)">;
43464346
}
43474347

43484348
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4349-
def vpdpwsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4349+
def vpdpwsuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, unsigned short>)">;
43504350
}
43514351

43524352
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4353-
def vpdpwsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4353+
def vpdpwsuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, unsigned short>)">;
43544354
}
43554355

43564356
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4357-
def vpdpwusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4357+
def vpdpwusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, short>)">;
43584358
}
43594359

43604360
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4361-
def vpdpwusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4361+
def vpdpwusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, short>)">;
43624362
}
43634363

43644364
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4365-
def vpdpwusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4365+
def vpdpwusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, short>)">;
43664366
}
43674367

43684368
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4369-
def vpdpwusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4369+
def vpdpwusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, short>)">;
43704370
}
43714371

43724372
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4373-
def vpdpwuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4373+
def vpdpwuud128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
43744374
}
43754375

43764376
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4377-
def vpdpwuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4377+
def vpdpwuud256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
43784378
}
43794379

43804380
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
4381-
def vpdpwuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
4381+
def vpdpwuuds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
43824382
}
43834383

43844384
let Features = "avxvnniint16|avx10.2", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4385-
def vpdpwuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
4385+
def vpdpwuuds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
43864386
}
43874387

43884388
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/Headers/avx10_2_512niintrin.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32(
185185
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A,
186186
__m512i __B,
187187
__m512i __C) {
188-
return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B,
189-
(__v16si)__C);
188+
return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v32hi)__B,
189+
(__v32hu)__C);
190190
}
191191

192192
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -206,8 +206,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
206206
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A,
207207
__m512i __B,
208208
__m512i __C) {
209-
return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B,
210-
(__v16si)__C);
209+
return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v32hi)__B,
210+
(__v32hu)__C);
211211
}
212212

213213
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
@@ -227,8 +227,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
227227
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A,
228228
__m512i __B,
229229
__m512i __C) {
230-
return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B,
231-
(__v16si)__C);
230+
return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v32hu)__B,
231+
(__v32hi)__C);
232232
}
233233

234234
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -248,8 +248,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
248248
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A,
249249
__m512i __B,
250250
__m512i __C) {
251-
return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B,
252-
(__v16si)__C);
251+
return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v32hu)__B,
252+
(__v32hi)__C);
253253
}
254254

255255
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
@@ -269,8 +269,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
269269
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A,
270270
__m512i __B,
271271
__m512i __C) {
272-
return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B,
273-
(__v16si)__C);
272+
return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v32hu)__B,
273+
(__v32hu)__C);
274274
}
275275

276276
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -290,8 +290,8 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
290290
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A,
291291
__m512i __B,
292292
__m512i __C) {
293-
return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B,
294-
(__v16si)__C);
293+
return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v32hu)__B,
294+
(__v32hu)__C);
295295
}
296296

297297
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(

0 commit comments

Comments
 (0)