Skip to content

Commit 86e532f

Browse files
committed
add avx10 minmax
1 parent aabf2ab commit 86e532f

File tree

3 files changed

+80
-0
lines changed

3 files changed

+80
-0
lines changed

gen/gen_avx512.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,13 @@ void putX_X_XM_IMM()
447447
{ 0x1B, "vcvtne2ph2hf8s", T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_B16 | T_N1, false },
448448

449449
{ 0x52, "vdpphps", T_MUST_EVEX | T_0F38 | T_EW0 | T_YMM | T_B32, false },
450+
{ 0x52, "vminmaxnepbf16", T_MUST_EVEX | T_F2 | T_0F3A | T_EW0 | T_YMM | T_B16, true },
451+
{ 0x52, "vminmaxpd", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_YMM | T_B64 | T_SAE_Y | T_SAE_Z, true },
452+
{ 0x52, "vminmaxph", T_MUST_EVEX | T_0F3A | T_EW0 | T_YMM | T_B16 | T_SAE_Y | T_SAE_Z, true },
453+
{ 0x52, "vminmaxps", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_YMM | T_B32 | T_SAE_Y | T_SAE_Z, true },
454+
{ 0x53, "vminmaxsd", T_MUST_EVEX | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_N8, true },
455+
{ 0x53, "vminmaxsh", T_MUST_EVEX | T_0F3A | T_EW0 | T_SAE_X | T_N2, true },
456+
{ 0x53, "vminmaxss", T_MUST_EVEX | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_N4, true },
450457
};
451458
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
452459
const Tbl *p = &tbl[i];

test/avx10/minmax.txt

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
vminmaxnepbf16(xm1|k3|T_z, xm2, xm3, 5);
2+
vminmaxnepbf16(xm1|k3|T_z, xm2, ptr[rax+128], 5);
3+
vminmaxnepbf16(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
4+
5+
vminmaxnepbf16(ym1|k3|T_z, ym2, ym3, 5);
6+
vminmaxnepbf16(ym1|k3|T_z, ym2, ptr[rax+128], 5);
7+
vminmaxnepbf16(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
8+
9+
vminmaxnepbf16(zm1|k3|T_z, zm2, zm3, 5);
10+
vminmaxnepbf16(zm1|k3|T_z, zm2, ptr[rax+128], 5);
11+
vminmaxnepbf16(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
12+
//
13+
vminmaxpd(xm1|k3|T_z, xm2, xm3, 5);
14+
vminmaxpd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
15+
vminmaxpd(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
16+
17+
vminmaxpd(ym1|k3|T_z, ym2, ym3, 5);
18+
vminmaxpd(ym1|k3|T_z, ym2, ym3|T_sae, 5);
19+
vminmaxpd(ym1|k3|T_z, ym2, ptr[rax+128], 5);
20+
vminmaxpd(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
21+
22+
vminmaxpd(zm1|k3|T_z, zm2, zm3, 5);
23+
vminmaxpd(zm1|k3|T_z, zm2, zm3|T_sae, 5);
24+
vminmaxpd(zm1|k3|T_z, zm2, ptr[rax+128], 5);
25+
vminmaxpd(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
26+
//
27+
vminmaxph(xm1|k3|T_z, xm2, xm3, 5);
28+
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
29+
vminmaxph(xm1|k3|T_z, xm2, ptr[rax+128], 5);
30+
vminmaxph(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
31+
32+
vminmaxph(ym1|k3|T_z, ym2, ym3, 5);
33+
vminmaxph(ym1|k3|T_z, ym2, ym3|T_sae, 5);
34+
vminmaxph(ym1|k3|T_z, ym2, ptr[rax+128], 5);
35+
vminmaxph(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
36+
37+
vminmaxph(zm1|k3|T_z, zm2, zm3, 5);
38+
vminmaxph(zm1|k3|T_z, zm2, zm3|T_sae, 5);
39+
vminmaxph(zm1|k3|T_z, zm2, ptr[rax+128], 5);
40+
vminmaxph(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
41+
//
42+
vminmaxps(xm1|k3|T_z, xm2, xm3, 5);
43+
vminmaxps(xm1|k3|T_z, xm2, ptr[rax+128], 5);
44+
vminmaxps(xm1|k3|T_z, xm2, ptr_b[rax+128], 5);
45+
46+
vminmaxps(ym1|k3|T_z, ym2, ym3, 5);
47+
vminmaxps(ym1|k3|T_z, ym2, ym3|T_sae, 5);
48+
vminmaxps(ym1|k3|T_z, ym2, ptr[rax+128], 5);
49+
vminmaxps(ym1|k3|T_z, ym2, ptr_b[rax+128], 5);
50+
51+
vminmaxps(zm1|k3|T_z, zm2, zm3, 5);
52+
vminmaxps(zm1|k3|T_z, zm2, zm3|T_sae, 5);
53+
vminmaxps(zm1|k3|T_z, zm2, ptr[rax+128], 5);
54+
vminmaxps(zm1|k3|T_z, zm2, ptr_b[rax+128], 5);
55+
//
56+
vminmaxsd(xm1|k3|T_z, xm2, xm3, 5);
57+
vminmaxsd(xm1|k3|T_z, xm2, xm3|T_sae, 5);
58+
vminmaxsd(xm1|k3|T_z, xm2, ptr[rax+128], 5);
59+
//
60+
vminmaxsh(xm1|k3|T_z, xm2, xm3, 5);
61+
vminmaxsh(xm1|k3|T_z, xm2, xm3|T_sae, 5);
62+
vminmaxsh(xm1|k3|T_z, xm2, ptr[rax+128], 5);
63+
//
64+
vminmaxss(xm1|k3|T_z, xm2, xm3, 5);
65+
vminmaxss(xm1|k3|T_z, xm2, xm3|T_sae, 5);
66+
vminmaxss(xm1|k3|T_z, xm2, ptr[rax+128], 5);

xbyak/xbyak_mnemonic.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2374,6 +2374,13 @@ void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm)
23742374
void vmaxpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5F); }
23752375
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5F); }
23762376
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5F); }
2377+
void vminmaxnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x52, imm); }
2378+
void vminmaxpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x52, imm); }
2379+
void vminmaxph(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B16, 0x52, imm); }
2380+
void vminmaxps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52, imm); }
2381+
void vminmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
2382+
void vminmaxsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
2383+
void vminmaxss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
23772384
void vminpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5D); }
23782385
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5D); }
23792386
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5D); }

0 commit comments

Comments
 (0)