Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RV64_DYNAREC] Implemented the first AVX128 opcode for scalar only #1962

Merged
merged 1 commit into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,8 @@ if(RV64_DYNAREC)
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f20f_vector.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f30f.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f30f_vector.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx.c"
"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c"
)
endif()

Expand Down
40 changes: 39 additions & 1 deletion src/dynarec/rv64/dynarec_rv64_00_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,45 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
*need_epilog = 0;
*ok = 0;
break;

case 0xC4:
nextop = F8;
if (rex.is32bits && !(MODREG)) {
DEFAULT;
} else {
vex_t vex = { 0 };
vex.rex = rex;
u8 = nextop;
vex.m = u8 & 0b00011111;
vex.rex.b = (u8 & 0b00100000) ? 0 : 1;
vex.rex.x = (u8 & 0b01000000) ? 0 : 1;
vex.rex.r = (u8 & 0b10000000) ? 0 : 1;
u8 = F8;
vex.p = u8 & 0b00000011;
vex.l = (u8 >> 2) & 1;
vex.v = ((~u8) >> 3) & 0b1111;
vex.rex.w = (u8 >> 7) & 1;
addr = dynarec64_AVX(dyn, addr, ip, ninst, vex, ok, need_epilog);
}
break;
case 0xC5:
nextop = F8;
if (rex.is32bits && !(MODREG)) {
DEFAULT;
} else {
vex_t vex = { 0 };
vex.rex = rex;
u8 = nextop;
vex.p = u8 & 0b00000011;
vex.l = (u8 >> 2) & 1;
vex.v = ((~u8) >> 3) & 0b1111;
vex.rex.r = (u8 & 0b10000000) ? 0 : 1;
vex.rex.b = 0;
vex.rex.x = 0;
vex.rex.w = 0;
vex.m = VEX_M_0F;
addr = dynarec64_AVX(dyn, addr, ip, ninst, vex, ok, need_epilog);
}
break;
case 0xC6:
INST_NAME("MOV Eb, Ib");
nextop=F8;
Expand Down
63 changes: 63 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_avx.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>

#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"

#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"

static const char* avx_prefix_string(uint16_t p)
{
switch (p) {
case VEX_P_NONE: return "0";
case VEX_P_66: return "66";
case VEX_P_F2: return "F2";
case VEX_P_F3: return "F3";
default: return "??";
}
}
static const char* avx_map_string(uint16_t m)
{
switch (m) {
case VEX_M_NONE: return "0";
case VEX_M_0F: return "0F";
case VEX_M_0F38: return "0F38";
case VEX_M_0F3A: return "0F3A";
default: return "??";
}
}

uintptr_t dynarec64_AVX(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
{
(void)ip;
(void)need_epilog;

uint8_t opcode = PK(0);
rex_t rex = vex.rex;

if ((vex.m == VEX_M_0F) && (vex.p == VEX_P_F3))
addr = dynarec64_AVX_F3_0F(dyn, addr, ip, ninst, vex, ok, need_epilog);
else {
DEFAULT;
}

if ((*ok == -1) && (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing == 1)) {
dynarec_log(LOG_NONE, "Dynarec unimplemented AVX opcode size %d prefix %s map %s opcode %02X ", 128 << vex.l, avx_prefix_string(vex.p), avx_map_string(vex.m), opcode);
}
return addr;
}
96 changes: 96 additions & 0 deletions src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <errno.h>

#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_native.h"
#include "my_cpuid.h"
#include "emu/x87emu_private.h"
#include "emu/x64shaext.h"

#include "rv64_printer.h"
#include "dynarec_rv64_private.h"
#include "dynarec_rv64_functions.h"
#include "dynarec_rv64_helper.h"

uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, vex_t vex, int* ok, int* need_epilog)
{
(void)ip;
(void)need_epilog;

uint8_t opcode = F8;
uint8_t nextop, u8;
uint8_t gd, ed, vd;
uint8_t wback, wb1, wb2, gback, vback;
uint8_t eb1, eb2, gb1, gb2;
int32_t i32, i32_;
int cacheupd = 0;
int v0, v1, v2;
int q0, q1, q2;
int d0, d1, d2;
int s0;
uint64_t tmp64u, u64;
int64_t j64;
int64_t fixedaddress, gdoffset, vxoffset;
int unscaled;

rex_t rex = vex.rex;

switch (opcode) {
case 0x10:
INST_NAME("VMOVSS Gx, [Vx,] Ex");
nextop = F8;
GETG;
if (MODREG) {
if (gd == vex.v) {
v0 = sse_get_reg(dyn, ninst, x1, gd, 1);
q0 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1);
FMVS(v0, q0);
} else {
GETGX();
GETVX();
GETEX(x2, 0, 1);
if (rv64_xtheadmempair) {
ADD(x1, vback, vxoffset);
TH_LDD(x3, x4, x1, 0);
} else {
LD(x3, vback, vxoffset);
LD(x4, vback, vxoffset + 8);
}
LWU(x5, wback, fixedaddress);
if (rv64_xtheadmempair) {
ADDI(x1, gback, gdoffset);
TH_SDD(x3, x4, x1, 0);
} else {
SD(x3, gback, gdoffset);
SD(x4, gback, gdoffset + 8);
}
SW(x5, gback, gdoffset);
}
} else {
v0 = sse_get_reg_empty(dyn, ninst, x1, gd, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
FLW(v0, ed, fixedaddress);
// reset upper part
SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4);
SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8);
}
YMM0(gd);
break;
default:
DEFAULT;
}
return addr;
}
2 changes: 1 addition & 1 deletion src/dynarec/rv64/dynarec_rv64_f30f.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
} else {
v0 = sse_get_reg_empty(dyn, ninst, x1, gd, 1);
SMREAD();
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
FLW(v0, ed, fixedaddress);
// reset upper part
SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4);
Expand Down
19 changes: 14 additions & 5 deletions src/dynarec/rv64/dynarec_rv64_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ int fpu_get_reg_xmm(dynarec_rv64_t* dyn, int t, int xmm)
return EXTREG(i);
}
// Reset fpu regs counter
void fpu_reset_reg_extcache(dynarec_rv64_t* dyn, extcache_t* e)
static void fpu_reset_reg_extcache(dynarec_rv64_t* dyn, extcache_t* e)
{
e->fpu_reg = 0;
for (int i=0; i<24; ++i) {
e->fpuused[i]=0;
for (int i = 0; i < 32; ++i) {
e->fpuused[i] = 0;
e->extcache[i].v = 0;
}
dyn->vector_sew = VECTOR_SEWNA;
Expand Down Expand Up @@ -492,7 +492,7 @@ void extcacheUnwind(extcache_t* cache)
cache->ssecache[i*2+1].v = -1;
}
int x87reg = 0;
for(int i=0; i<24; ++i) {
for (int i = 0; i < 32; ++i) {
if(cache->extcache[i].v) {
cache->fpuused[i] = 1;
switch (cache->extcache[i].t) {
Expand All @@ -515,6 +515,8 @@ void extcacheUnwind(extcache_t* cache)
break;
case EXT_CACHE_XMMR:
case EXT_CACHE_XMMW:
case EXT_CACHE_YMMR:
case EXT_CACHE_YMMW:
cache->ssecache[cache->extcache[i].n].reg = EXTREG(i);
cache->ssecache[cache->extcache[i].n].vector = 1;
cache->ssecache[cache->extcache[i].n].write = (cache->extcache[i].t == EXT_CACHE_XMMW) ? 1 : 0;
Expand Down Expand Up @@ -605,6 +607,8 @@ const char* getCacheName(int t, int n)
case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break;
case EXT_CACHE_XMMW: sprintf(buff, "XMM%d", n); break;
case EXT_CACHE_XMMR: sprintf(buff, "xmm%d", n); break;
case EXT_CACHE_YMMW: sprintf(buff, "YMM%d", n); break;
case EXT_CACHE_YMMR: sprintf(buff, "ymm%d", n); break;
case EXT_CACHE_NONE: buff[0]='\0'; break;
}
return buff;
Expand Down Expand Up @@ -654,7 +658,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
dynarec_log(LOG_NONE, ", jmp=out");
if(dyn->last_ip)
dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);
for(int ii=0; ii<24; ++ii) {
for (int ii = 0; ii < 32; ++ii) {
switch(dyn->insts[ninst].e.extcache[ii].t) {
case EXT_CACHE_ST_D: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_ST_F: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
Expand All @@ -664,11 +668,15 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
case EXT_CACHE_SD: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_XMMR: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_XMMW: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_YMMW: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_YMMR: dynarec_log(LOG_NONE, " %s:%s", vnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_SCR: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break;
case EXT_CACHE_NONE:
default: break;
}
}
if (dyn->ymm_zero)
dynarec_log(LOG_NONE, " ymm0_mask = %04x", dyn->ymm_zero);
if(dyn->e.stack || dyn->insts[ninst].e.stack_next || dyn->insts[ninst].e.x87stack)
dynarec_log(LOG_NONE, " X87:%d/%d(+%d/-%d)%d", dyn->e.stack, dyn->insts[ninst].e.stack_next, dyn->insts[ninst].e.stack_push, dyn->insts[ninst].e.stack_pop, dyn->insts[ninst].e.x87stack);
if(dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2)
Expand Down Expand Up @@ -733,6 +741,7 @@ void fpu_reset(dynarec_rv64_t* dyn)
mmx_reset(&dyn->e);
sse_reset(&dyn->e);
fpu_reset_reg(dyn);
dyn->ymm_zero = 0;
}

void fpu_reset_ninst(dynarec_rv64_t* dyn, int ninst)
Expand Down
Loading
Loading