Skip to content
forked from v8/v8

Commit

Permalink
[x64] Implement 256-bit assembly for SSE4_UNOP instructions
Browse files Browse the repository at this point in the history
Bug: v8:12228
Change-Id: Icd61de973b4d80bd81a2bbbf9db621315fb21a3c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4535539
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Yolanda Chen <yolanda.chen@intel.com>
Cr-Commit-Position: refs/heads/main@{#87781}
  • Loading branch information
yolanda15 authored and V8 LUCI CQ committed May 23, 2023
1 parent fb7837b commit b71d303
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 13 deletions.
21 changes: 16 additions & 5 deletions src/codegen/x64/assembler-x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1330,16 +1330,27 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} \
void v##instruction(XMMRegister dst, Operand src) { \
vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
} \
}
SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_PMOV_AVX_INSTRUCTION)
#undef DECLARE_SSE4_PMOV_AVX_INSTRUCTION

#define DECLARE_SSE4_PMOV_AVX2_INSTRUCTION(instruction, prefix, escape1, \
escape2, opcode) \
void v##instruction(YMMRegister dst, XMMRegister src) { \
vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
} \
void v##instruction(YMMRegister dst, Operand src) { \
vinstr(0x##opcode, dst, ymm0, src, k##prefix, k##escape1##escape2, kW0); \
vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
}
SSE4_UNOP_INSTRUCTION_LIST_PMOV(DECLARE_SSE4_PMOV_AVX2_INSTRUCTION)
#undef DECLARE_SSE4_PMOV_AVX2_INSTRUCTION

SSE4_UNOP_INSTRUCTION_LIST(DECLARE_SSE4_PMOV_AVX_INSTRUCTION)
#undef DECLARE_SSE4_PMOV_AVX_INSTRUCTION
void vptest(YMMRegister dst, YMMRegister src) {
vinstr(0x17, dst, ymm0, src, k66, k0F38, kW0, AVX);
}
void vptest(YMMRegister dst, Operand src) {
vinstr(0x17, dst, ymm0, src, k66, k0F38, kW0, AVX);
}

#define DECLARE_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, opcode) \
void v##instruction(Register dst, XMMRegister src, uint8_t imm8) { \
Expand Down
15 changes: 9 additions & 6 deletions src/codegen/x64/sse-instr.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,15 @@
// SSE instructions whose AVX version has two operands.
#define SSE4_UNOP_INSTRUCTION_LIST(V) \
V(ptest, 66, 0F, 38, 17) \
V(pmovsxbw, 66, 0F, 38, 20) \
V(pmovsxwd, 66, 0F, 38, 23) \
V(pmovsxdq, 66, 0F, 38, 25) \
V(pmovzxbw, 66, 0F, 38, 30) \
V(pmovzxbd, 66, 0F, 38, 31) \
V(pmovzxwd, 66, 0F, 38, 33) \
SSE4_UNOP_INSTRUCTION_LIST_PMOV(V)

#define SSE4_UNOP_INSTRUCTION_LIST_PMOV(V) \
V(pmovsxbw, 66, 0F, 38, 20) \
V(pmovsxwd, 66, 0F, 38, 23) \
V(pmovsxdq, 66, 0F, 38, 25) \
V(pmovzxbw, 66, 0F, 38, 30) \
V(pmovzxbd, 66, 0F, 38, 31) \
V(pmovzxwd, 66, 0F, 38, 33) \
V(pmovzxdq, 66, 0F, 38, 35)

#define SSE4_EXTRACT_INSTRUCTION_LIST(V) \
Expand Down
31 changes: 29 additions & 2 deletions test/unittests/assembler/assembler-x64-unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2552,6 +2552,13 @@ TEST_F(AssemblerX64Test, AssemblerX64AVX2Op256bit) {
__ vpbroadcastb(ymm3, Operand(rbx, rcx, times_4, 10000));
__ vpbroadcastw(ymm15, xmm4);
__ vpbroadcastw(ymm5, Operand(rbx, rcx, times_4, 10000));
__ vpmovsxbw(ymm6, xmm5);
__ vpmovsxwd(ymm1, Operand(rbx, rcx, times_4, 10000));
__ vpmovsxdq(ymm14, xmm6);
__ vpmovzxbw(ymm0, Operand(rbx, rcx, times_4, 10000));
__ vpmovzxbd(ymm14, xmm6);
__ vpmovzxwd(ymm7, Operand(rbx, rcx, times_4, 10000));
__ vpmovzxdq(ymm8, xmm6);

CodeDesc desc;
masm.GetCode(isolate, &desc);
Expand Down Expand Up @@ -2594,7 +2601,21 @@ TEST_F(AssemblerX64Test, AssemblerX64AVX2Op256bit) {
// vpbroadcastw ymm15, xmm4
0xc4, 0x62, 0x7d, 0x79, 0xfc,
// vpbroadcastw ymm5, WORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x79, 0xac, 0x8b, 0x10, 0x27, 0x00, 0x00};
0xc4, 0xe2, 0x7d, 0x79, 0xac, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vpmovsxbw ymm6, xmm5
0xc4, 0xe2, 0x7d, 0x20, 0xf5,
// vpmovsxwd ymm1, XMMWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x23, 0x8c, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vpmovsxdq ymm14, xmm6
0xc4, 0x62, 0x7d, 0x25, 0xf6,
// vpmovzxbw ymm0, XMMWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x30, 0x84, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vpmovzxbd ymm14 xmm6
0xc4, 0x62, 0x7d, 0x31, 0xf6,
// vpmovzxwd ymm7, XMMWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x33, 0xbc, 0x8b, 0x10, 0x27, 0x00, 0x00,
// vpmovzxdq ymm8, xmm6
0xc4, 0x62, 0x7d, 0x35, 0xc6};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}

Expand Down Expand Up @@ -2824,6 +2845,8 @@ TEST_F(AssemblerX64Test, AssemblerX64CmpOperations256bit) {
__ vcmpnltpd(ymm10, ymm12, Operand(r12, r11, times_4, 10000));
__ vcmpnleps(ymm9, ymm11, Operand(r10, r9, times_8, 10000));
__ vcmpgepd(ymm13, ymm3, ymm12);
__ vptest(ymm7, ymm1);
__ vptest(ymm10, Operand(rbx, rcx, times_4, 10000));

CodeDesc desc;
masm.GetCode(isolate, &desc);
Expand All @@ -2850,7 +2873,11 @@ TEST_F(AssemblerX64Test, AssemblerX64CmpOperations256bit) {
// vcmpnleps ymm9, ymm11, YMMWORD PTR [r10+r9*8+0x2710]
0xC4, 0x01, 0x24, 0xC2, 0x8C, 0xCA, 0x10, 0x27, 0x00, 0x00, 0x06,
// vcmpgepd ymm13, ymm3, ymm12
0xC4, 0x41, 0x65, 0xC2, 0xEC, 0x0D};
0xC4, 0x41, 0x65, 0xC2, 0xEC, 0x0D,
// vptest ymm7, ymm1
0xc4, 0xe2, 0x7d, 0x17, 0xf9,
// vptest ymm10, YMMWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0x62, 0x7d, 0x17, 0x94, 0x8b, 0x10, 0x27, 0x00, 0x00};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}

Expand Down
17 changes: 17 additions & 0 deletions test/unittests/assembler/disasm-x64-unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,9 @@ TEST_F(DisasmX64Test, DisasmX64YMMRegister) {
vcmpnlepd(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5dcc2e90d vcmpps ymm5,ymm4,ymm1, (ge)",
vcmpgeps(ymm5, ymm4, ymm1));
COMPARE("c4e27d17f9 vptest ymm7,ymm1", vptest(ymm7, ymm1));
COMPARE("c4627d17948b10270000 vptest ymm10,[rbx+rcx*4+0x2710]",
vptest(ymm10, Operand(rbx, rcx, times_4, 10000)));

// SSE2_UNOP
COMPARE("c5fd51ca vsqrtpd ymm1,ymm2", vsqrtpd(ymm1, ymm2));
Expand Down Expand Up @@ -1522,6 +1525,20 @@ TEST_F(DisasmX64Test, DisasmX64YMMRegister) {
vpermq(ymm5, ymm3, 0xD8));
COMPARE("c463fd00848b102700001e vpermq ymm8,[rbx+rcx*4+0x2710],0x1e",
vpermq(ymm8, Operand(rbx, rcx, times_4, 10000), 0x1E));

// SSE4_UNOP
COMPARE("c4e27d20f5 vpmovsxbw ymm6,ymm5", vpmovsxbw(ymm6, ymm5));
COMPARE("c4e27d238c8b10270000 vpmovsxwd ymm1,[rbx+rcx*4+0x2710]",
vpmovsxwd(ymm1, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4627d25f6 vpmovsxdq ymm14,ymm6",
vpmovsxdq(ymm14, ymm6));
COMPARE("c4e27d30848b10270000 vpmovzxbw ymm0,[rbx+rcx*4+0x2710]",
vpmovzxbw(ymm0, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4627d31f6 vpmovzxbd ymm14,ymm6",
vpmovzxbd(ymm14, ymm6));
COMPARE("c4e27d33bc8b10270000 vpmovzxwd ymm7,[rbx+rcx*4+0x2710]",
vpmovzxwd(ymm7, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4627d35c6 vpmovzxdq ymm8,ymm6", vpmovzxdq(ymm8, ymm6));
}
}

Expand Down

0 comments on commit b71d303

Please sign in to comment.