Skip to content
This repository has been archived by the owner on Jul 10, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1601 from davidgfnet/morevmem
Browse files Browse the repository at this point in the history
vmem API consolidation and implementation of NO_RWX
  • Loading branch information
davidgfnet authored May 14, 2019
2 parents ee9646a + 3463872 commit 83a6c0e
Show file tree
Hide file tree
Showing 13 changed files with 315 additions and 162 deletions.
9 changes: 8 additions & 1 deletion core/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
#define CPU_GENERIC 0x20000005 //used for pnacl, emscripten, etc
#define CPU_PPC 0x20000006
#define CPU_PPC64 0x20000007
#define CPU_A64 0x20000008
#define CPU_ARM64 0x20000008
#define CPU_MIPS64 0x20000009

//BUILD_COMPILER
Expand Down Expand Up @@ -326,6 +326,13 @@
#endif


// Some restrictions on FEAT_NO_RWX_PAGES
#if defined(FEAT_NO_RWX_PAGES) && FEAT_SHREC == DYNAREC_JIT
#if HOST_CPU != CPU_X64 && HOST_CPU != CPU_ARM64
#error "FEAT_NO_RWX_PAGES Only implemented for X64 and ARMv8"
#endif
#endif


// TARGET PLATFORM

Expand Down
14 changes: 7 additions & 7 deletions core/hw/aica/dsp_arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;

extern void Arm64CacheFlush(void* start, void* end);
extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);

class DSPAssembler : public MacroAssembler
{
Expand All @@ -54,9 +54,9 @@ class DSPAssembler : public MacroAssembler
Stp(xzr, xzr, MemOperand(x0, 48));
Ret();
FinalizeCode();
#ifdef _ANDROID
Arm64CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#endif
vmem_platform_flush_cache(
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>(),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());

return;
}
Expand Down Expand Up @@ -387,9 +387,9 @@ class DSPAssembler : public MacroAssembler
#endif
FinalizeCode();

#ifdef _ANDROID
Arm64CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#endif
vmem_platform_flush_cache(
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>(),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
}

private:
Expand Down
8 changes: 5 additions & 3 deletions core/hw/arm7/arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
using namespace vixl::aarch64;
//#include "deps/vixl/aarch32/disasm-aarch32.h"

extern void Arm64CacheFlush(void* start, void* end);
extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
extern u32 arm_single_op(u32 opcode);
extern "C" void arm_dispatch();
extern "C" void arm_exit();
Expand All @@ -41,7 +41,7 @@ extern reg_pair arm_Reg[RN_ARM_REG_COUNT];
MacroAssembler *assembler;

extern "C" void armFlushICache(void *bgn, void *end) {
Arm64CacheFlush(bgn, end);
vmem_platform_flush_cache(bgn, end, bgn, end);
}

static MemOperand arm_reg_operand(u32 regn)
Expand Down Expand Up @@ -143,7 +143,9 @@ void armv_end(void* codestart, u32 cycl)

assembler->FinalizeCode();
verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity());
Arm64CacheFlush(codestart, assembler->GetBuffer()->GetEndAddress<void*>());
vmem_platform_flush_cache(
codestart, assembler->GetBuffer()->GetEndAddress<void*>(),
codestart, assembler->GetBuffer()->GetEndAddress<void*>());
icPtr += assembler->GetBuffer()->GetSizeInBytes();

#if 0
Expand Down
1 change: 1 addition & 0 deletions core/hw/mem/_vmem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ bool _vmem_reserve() {
}
else {
printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB");
printf("Info: p_sh4rcb: %p virt_ram_base: %p\n", p_sh4rcb, virt_ram_base);
// Map the different parts of the memory file into the new memory range we got.
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
Expand Down
8 changes: 8 additions & 0 deletions core/hw/mem/_vmem.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ void vmem_platform_ondemand_page(void *address, unsigned size_bytes);
void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps);
// Just tries to wipe as much as possible in the relevant area.
void vmem_platform_destroy();
// Given a block of data in the .text section, prepares it for JIT action.
// both code_area and size are page aligned. Returns success.
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx);
// Same as above but uses two address spaces one with RX and RW protections.
// Note: this function doesnt have to be implemented, it's a fallback for the above one.
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, uintptr_t *rx_offset);
// This might not need an implementation (ie x86/64 cpus).
void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);

// Note: if you want to disable vmem magic in any given platform, implement the
// above functions as empty functions and make vmem_platform_init return MemTypeError.
Expand Down
29 changes: 17 additions & 12 deletions core/hw/sh4/dyna/blockmanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,49 +86,55 @@ u32 bm_gc_luc,bm_gcf_luc;

#define FPCA(x) ((DynarecCodeEntryPtr&)sh4rcb.fpcb[(x>>1)&FPCB_MASK])

// This returns an executable address
DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr)
{
//rdv_FailedToFindBlock_pc=addr;
DynarecCodeEntryPtr rv=(DynarecCodeEntryPtr)FPCA(addr);
DynarecCodeEntryPtr rv = (DynarecCodeEntryPtr)FPCA(addr);

return (DynarecCodeEntryPtr)rv;
}

// This returns an executable address
DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr)
{
return (DynarecCodeEntryPtr)bm_GetCode(addr);
}

// This returns an executable address
RuntimeBlockInfo* DYNACALL bm_GetBlock(u32 addr)
{
DynarecCodeEntryPtr cde=bm_GetCode(addr);
DynarecCodeEntryPtr cde = bm_GetCode(addr); // Returns RX ptr

if (cde==ngen_FailedToFindBlock)
if (cde == ngen_FailedToFindBlock)
return 0;
else
return bm_GetBlock((void*)cde);
return bm_GetBlock((void*)cde); // Returns RX pointer
}

// This takes a RX address and returns the info block ptr (RW space)
RuntimeBlockInfo* bm_GetBlock(void* dynarec_code)
{
blkmap_t::iterator iter=blkmap.find((RuntimeBlockInfo*)dynarec_code);
if (iter!=blkmap.end())
void *dynarecrw = CC_RX2RW(dynarec_code);
blkmap_t::iterator iter = blkmap.find((RuntimeBlockInfo*)dynarecrw);
if (iter != blkmap.end())
{
verify((*iter)->contains_code((u8*)dynarec_code));
verify((*iter)->contains_code((u8*)dynarecrw));
return *iter;
}
else
{
printf("bm_GetBlock(%p) failed ..\n",dynarec_code);
printf("bm_GetBlock(%p) failed ..\n", dynarec_code);
return 0;
}
}

// Takes RX pointer and returns a RW pointer
RuntimeBlockInfo* bm_GetStaleBlock(void* dynarec_code)
{
void *dynarecrw = CC_RX2RW(dynarec_code);
for(u32 i=0;i<del_blocks.size();i++)
{
if (del_blocks[i]->contains_code((u8*)dynarec_code))
if (del_blocks[i]->contains_code((u8*)dynarecrw))
return del_blocks[i];
}

Expand All @@ -145,9 +151,8 @@ void bm_AddBlock(RuntimeBlockInfo* blk)
}
blkmap.insert(blk);


verify((void*)bm_GetCode(blk->addr)==(void*)ngen_FailedToFindBlock);
FPCA(blk->addr)=blk->code;
FPCA(blk->addr) = (DynarecCodeEntryPtr)CC_RW2RX(blk->code);

#ifdef DYNA_OPROF
if (oprofHandle)
Expand Down
99 changes: 25 additions & 74 deletions core/hw/sh4/dyna/driver.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
#include "types.h"

#if HOST_OS==OS_WINDOWS
#include <windows.h>
#elif HOST_OS==OS_LINUX
#include <unistd.h>
#include <sys/mman.h>
#endif

#include "../sh4_interpreter.h"
#include "../sh4_opcode_list.h"
#include "../sh4_core.h"
#include "../sh4_if.h"
#include "hw/sh4/sh4_interrupts.h"

#include "hw/mem/_vmem.h"
#include "hw/sh4/sh4_mem.h"
#include "hw/pvr/pvr_mem.h"
#include "hw/aica/aica_if.h"
Expand All @@ -26,9 +20,7 @@
#include "decoder.h"

#if FEAT_SHREC != DYNAREC_NONE
//uh uh

#if !defined(_WIN64)
u8 SH4_TCB[CODE_SIZE+4096]
#if HOST_OS == OS_WINDOWS || FEAT_SHREC != DYNAREC_JIT
;
Expand All @@ -39,10 +31,9 @@ u8 SH4_TCB[CODE_SIZE+4096]
#else
#error SH4_TCB ALLOC
#endif
#endif

u8* CodeCache;

uintptr_t cc_rx_offset;

u32 LastAddr;
u32 LastAddr_min;
Expand Down Expand Up @@ -275,7 +266,7 @@ DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc)
//printf("rdv_FailedToFindBlock ~ %08X\n",pc);
next_pc=pc;

return rdv_CompilePC();
return (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC());
}

static void ngen_FailedToFindBlock_internal() {
Expand Down Expand Up @@ -314,35 +305,27 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc)
{
next_pc=pc;
recSh4_ClearCache();
return rdv_CompilePC();
}

DynarecCodeEntryPtr rdv_FindCode()
{
DynarecCodeEntryPtr rv=bm_GetCode(next_pc);
if (rv==ngen_FailedToFindBlock)
return 0;

return rv;
return (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC());
}

DynarecCodeEntryPtr rdv_FindOrCompile()
{
DynarecCodeEntryPtr rv=bm_GetCode(next_pc);
if (rv==ngen_FailedToFindBlock)
rv=rdv_CompilePC();
DynarecCodeEntryPtr rv = bm_GetCode(next_pc); // Returns exec addr
if (rv == ngen_FailedToFindBlock)
rv = (DynarecCodeEntryPtr)CC_RW2RX(rdv_CompilePC()); // Returns rw addr

return rv;
}

void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc)
{
RuntimeBlockInfo* rbi=bm_GetBlock(code);
// code is the RX addr to return after, however bm_GetBlock returns RW
RuntimeBlockInfo* rbi = bm_GetBlock(code);

if (!rbi)
{
printf("Stale block ..");
rbi=bm_GetStaleBlock(code);
rbi = bm_GetStaleBlock(code);
}

verify(rbi != NULL);
Expand All @@ -365,7 +348,7 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc)
next_pc=rbi->NextBlock;
}

DynarecCodeEntryPtr rv=rdv_FindOrCompile();
DynarecCodeEntryPtr rv = rdv_FindOrCompile(); // Returns rx ptr

bool do_link=bm_GetBlock(code)==rbi;

Expand Down Expand Up @@ -455,56 +438,23 @@ void recSh4_Init()
if (_nvmem_enabled()) {
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000));
}

#if defined(_WIN64)
#ifdef _MSC_VER
for (int i = 10; i < 1300; i++) {


//align to next page ..
u8* ptr = (u8*)recSh4_Init - i * 1024 * 1024;
// Prepare some pointer to the pre-allocated code cache:
void *candidate_ptr = (void*)(((unat)SH4_TCB + 4095) & ~4095);

CodeCache = (u8*)VirtualAlloc(ptr, CODE_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);//; (u8*)(((unat)SH4_TCB+4095)& ~4095);

if (CodeCache)
break;
}
#else
CodeCache = (u8*)VirtualAlloc(NULL, CODE_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#endif
verify(CodeCache != NULL);
#else
CodeCache = (u8*)(((unat)SH4_TCB+4095)& ~4095);
#endif

#if HOST_OS == OS_DARWIN
munmap(CodeCache, CODE_SIZE);
CodeCache = (u8*)mmap(CodeCache, CODE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0);
#endif

#if HOST_OS == OS_WINDOWS
DWORD old;
VirtualProtect(CodeCache,CODE_SIZE,PAGE_EXECUTE_READWRITE,&old);
#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN

printf("\n\t CodeCache addr: %p | from: %p | addr here: %p\n", CodeCache, CodeCache, recSh4_Init);

#if FEAT_SHREC == DYNAREC_JIT
if (mprotect(CodeCache, CODE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC))
{
perror("\n\tError,Couldn’t mprotect CodeCache!");
die("Couldn’t mprotect CodeCache");
}
// Call the platform-specific magic to make the pages RWX
CodeCache = NULL;
#ifdef FEAT_NO_RWX_PAGES
verify(vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE, (void**)&CodeCache, &cc_rx_offset));
#else
verify(vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE, (void**)&CodeCache));
#endif
// Ensure the pointer returned is non-null
verify(CodeCache != NULL);

#if TARGET_IPHONE
memset((u8*)mmap(CodeCache, CODE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0),0xFF,CODE_SIZE);
#else
memset(CodeCache,0xFF,CODE_SIZE);
#endif

#endif
memset(CodeCache, 0xFF, CODE_SIZE);
ngen_init();
bm_Reset();
}

void recSh4_Term()
Expand Down Expand Up @@ -532,4 +482,5 @@ void Get_Sh4Recompiler(sh4_if* rv)
rv->IsCpuRunning = recSh4_IsCpuRunning;
rv->ResetCache = recSh4_ClearCache;
}
#endif

#endif // FEAT_SHREC != DYNAREC_NONE
13 changes: 13 additions & 0 deletions core/hw/sh4/dyna/ngen.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@

#define CODE_SIZE (10*1024*1024)

// When NO_RWX is enabled there's two address-spaces, one executable and
// one writtable. The emitter and most of the code in rec-* will work with
// the RW pointer. However the fpcb table and other pointers during execution
// (ie. exceptions) are RX pointers. These two macros convert between them by
// sub/add the pointer offset. CodeCache will point to the RW pointer for simplicity.
#ifdef FEAT_NO_RWX_PAGES
extern uintptr_t cc_rx_offset;
#define CC_RW2RX(ptr) (void*)(((uintptr_t)(ptr)) + cc_rx_offset)
#define CC_RX2RW(ptr) (void*)(((uintptr_t)(ptr)) - cc_rx_offset)
#else
#define CC_RW2RX(ptr) (ptr)
#define CC_RX2RW(ptr) (ptr)
#endif

//alternative emit ptr, set to 0 to use the main buffer
extern u32* emit_ptr;
Expand Down
6 changes: 1 addition & 5 deletions core/linux/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,8 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)

context_from_segfault(&ctx, segfault_ctx);

bool dyna_cde = ((unat)ctx.pc>(unat)CodeCache) && ((unat)ctx.pc<(unat)(CodeCache + CODE_SIZE));
bool dyna_cde = ((unat)CC_RX2RW(ctx.pc) > (unat)CodeCache) && ((unat)CC_RX2RW(ctx.pc) < (unat)(CodeCache + CODE_SIZE));

//ucontext_t* ctx=(ucontext_t*)ctxr;
//printf("mprot hit @ ptr 0x%08X @@ code: %08X, %d\n",si->si_addr,ctx->uc_mcontext.arm_pc,dyna_cde);


if (VramLockedWrite((u8*)si->si_addr) || BM_LockedWrite((u8*)si->si_addr))
return;
#if FEAT_SHREC == DYNAREC_JIT
Expand Down
Loading

0 comments on commit 83a6c0e

Please sign in to comment.