Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow arm64 jit when memory base is not nicely aligned #10465

Merged
merged 8 commits into from
Dec 28, 2017
1 change: 0 additions & 1 deletion Common/Arm64Emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3906,7 +3906,6 @@ void ARM64CodeBlock::PoisonMemory(int offset) {
// AArch64: 0xD4200000 = BRK 0
while (ptr < maxptr)
*ptr++ = 0xD4200000;
FlushIcacheSection((u8 *)ptr, (u8 *)maxptr);
}

} // namespace
4 changes: 4 additions & 0 deletions Common/CodeBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ template<class T> class CodeBlock : public CodeBlockCommon, public T {
// If not WX Exclusive, no need to call ProtectMemoryPages because we never change the protection from RWX.
PoisonMemory(offset);
ResetCodePtr(offset);
if (PlatformIsWXExclusive()) {
// Need to re-protect the part we didn't clear.
ProtectMemoryPages(region, offset, MEM_PROT_READ | MEM_PROT_EXEC);
}
}

// BeginWrite/EndWrite assume that we keep appending.
Expand Down
2 changes: 1 addition & 1 deletion Core/MIPS/ARM64/Arm64CompALU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void Arm64Jit::Comp_IType(MIPSOpcode op) {
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
// Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others.
if (rs == rt && gpr.IsMappedAsPointer(rs) && IsImmArithmetic(simm < 0 ? -simm : simm, nullptr, nullptr)) {
if (rs == rt && jo.enablePointerify && gpr.IsMappedAsPointer(rs) && IsImmArithmetic(simm < 0 ? -simm : simm, nullptr, nullptr)) {
ARM64Reg r32 = gpr.R(rs);
gpr.MarkDirty(r32);
ARM64Reg r = EncodeRegTo64(r32);
Expand Down
15 changes: 12 additions & 3 deletions Core/MIPS/ARM64/Arm64CompFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,11 @@ void Arm64Jit::Comp_FPULS(MIPSOpcode op)
} else {
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
}
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: Would be better to just use MEMBASEREG as the last arg to LDR and not do either of these?

-[Unknown]

Copy link
Owner

@hrydgard hrydgard Dec 28, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, that form of LDR takes an immediate offset, not a register, but changing to the register sum LDR would indeed probably be a win. Definitely not a loss. Don't remember, do we initialize MEMBASEREG even if pointerization is enabled? Because if we don't gain an extra register from pointerization, I'm not sure it's even really worth it... I think Dolphin moved away from it.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Though I suppose not needing to reload registers to "depointerize" should still be quite a win.

}
}
fp.LDR(32, INDEX_UNSIGNED, fpr.R(ft), SCRATCH1_64, 0);
for (auto skip : skips) {
Expand Down Expand Up @@ -139,7 +143,11 @@ void Arm64Jit::Comp_FPULS(MIPSOpcode op)
} else {
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
}
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
}
}
fp.STR(32, INDEX_UNSIGNED, fpr.R(ft), SCRATCH1_64, 0);
for (auto skip : skips) {
Expand Down Expand Up @@ -349,7 +357,8 @@ void Arm64Jit::Comp_mxc1(MIPSOpcode op)

case 4: //FI(fs) = R(rt); break; //mtc1
if (gpr.IsImm(rt)) {
uint32_t ival = gpr.GetImm(rt);
// This can't be run on LO/HI.
uint32_t ival = (uint32_t)gpr.GetImm(rt);
float floatval;
memcpy(&floatval, &ival, sizeof(floatval));
uint8_t imm8;
Expand Down
24 changes: 20 additions & 4 deletions Core/MIPS/ARM64/Arm64CompVFPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ namespace MIPSComp {
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
}
// Pointerify
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
}
}
fp.LDR(32, INDEX_UNSIGNED, fpr.V(vt), SCRATCH1_64, 0);
for (auto skip : skips) {
Expand Down Expand Up @@ -261,7 +265,11 @@ namespace MIPSComp {
} else {
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
}
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
}
}
fp.STR(32, INDEX_UNSIGNED, fpr.V(vt), SCRATCH1_64, 0);
for (auto skip : skips) {
Expand Down Expand Up @@ -303,7 +311,11 @@ namespace MIPSComp {
} else {
skips = SetScratch1ForSafeAddress(rs, imm, SCRATCH2);
}
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
}
}

fp.LDP(32, INDEX_SIGNED, fpr.V(vregs[0]), fpr.V(vregs[1]), SCRATCH1_64, 0);
Expand Down Expand Up @@ -332,7 +344,11 @@ namespace MIPSComp {
} else {
skips = SetScratch1ForSafeAddress(rs, imm, SCRATCH2);
}
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
if (jo.enablePointerify) {
MOVK(SCRATCH1_64, ((uint64_t)Memory::base) >> 32, SHIFT_32);
} else {
ADD(SCRATCH1_64, SCRATCH1_64, MEMBASEREG);
}
}

fp.STP(32, INDEX_SIGNED, fpr.V(vregs[0]), fpr.V(vregs[1]), SCRATCH1_64, 0);
Expand Down
1 change: 1 addition & 0 deletions Core/MIPS/ARM64/Arm64Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ void Arm64Jit::ClearCache() {
ILOG("ARM64Jit: Clearing the cache!");
blocks.Clear();
ClearCodeSpace(jitStartOffset);
FlushIcacheSection(region + jitStartOffset, region + region_size - jitStartOffset);
}

void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) {
Expand Down
109 changes: 95 additions & 14 deletions Core/MIPS/ARM64/Arm64RegCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void Arm64RegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
const StaticAllocation *statics = GetStaticAllocations(numStatics);
for (int i = 0; i < numStatics; i++) {
ar[statics[i].ar].mipsReg = statics[i].mr;
ar[statics[i].ar].pointerified = statics[i].pointerified;
ar[statics[i].ar].pointerified = statics[i].pointerified && jo_->enablePointerify;
mr[statics[i].mr].loc = ML_ARMREG;
mr[statics[i].mr].reg = statics[i].ar;
mr[statics[i].mr].isStatic = true;
Expand Down Expand Up @@ -110,7 +110,7 @@ void Arm64RegCache::EmitLoadStaticRegisters() {
for (int i = 0; i < count; i++) {
int offset = GetMipsRegOffset(allocs[i].mr);
emit_->LDR(INDEX_UNSIGNED, allocs[i].ar, CTXREG, offset);
if (allocs[i].pointerified) {
if (allocs[i].pointerified && jo_->enablePointerify) {
emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32);
}
}
Expand Down Expand Up @@ -150,6 +150,8 @@ bool Arm64RegCache::IsMappedAsPointer(MIPSGPReg mipsReg) {
if (ar[mr[mipsReg].reg].pointerified) {
ELOG("Really shouldn't be pointerified here");
}
} else if (mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
return true;
}
return false;
}
Expand Down Expand Up @@ -288,6 +290,17 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
mr[mipsReg].loc = ML_ARMREG_IMM;
ar[armReg].pointerified = false;
}
} else if (mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
// Was mapped as pointer, now we want it mapped as a value, presumably to
// add or subtract stuff to it.
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
ARM64Reg loadReg = armReg;
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, GetMipsRegOffset(mipsReg));
}
mr[mipsReg].loc = ML_ARMREG;
}
// Erasing the imm on dirty (necessary since otherwise we will still think it's ML_ARMREG_IMM and return
// true for IsImm and calculate crazily wrong things). /unknown
Expand Down Expand Up @@ -315,6 +328,21 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
}

return mr[mipsReg].reg;
} else if (mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
// Was mapped as pointer, now we want it mapped as a value, presumably to
// add or subtract stuff to it.
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
ARM64Reg loadReg = armReg;
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, GetMipsRegOffset(mipsReg));
}
mr[mipsReg].loc = ML_ARMREG;
if (mapFlags & MAP_DIRTY) {
ar[armReg].isDirty = true;
}
return (ARM64Reg)mr[mipsReg].reg;
}

// Okay, not mapped, so we need to allocate an ARM register.
Expand Down Expand Up @@ -358,6 +386,11 @@ ARM64Reg Arm64RegCache::MapReg(MIPSGPReg mipsReg, int mapFlags) {
}

Arm64Gen::ARM64Reg Arm64RegCache::MapRegAsPointer(MIPSGPReg reg) {
// Already mapped.
if (mr[reg].loc == ML_ARMREG_AS_PTR) {
return mr[reg].reg;
}

ARM64Reg retval = INVALID_REG;
if (mr[reg].loc != ML_ARMREG && mr[reg].loc != ML_ARMREG_IMM) {
retval = MapReg(reg);
Expand All @@ -368,9 +401,23 @@ Arm64Gen::ARM64Reg Arm64RegCache::MapRegAsPointer(MIPSGPReg reg) {
if (mr[reg].loc == ML_ARMREG || mr[reg].loc == ML_ARMREG_IMM) {
// If there was an imm attached, discard it.
mr[reg].loc = ML_ARMREG;
int a = DecodeReg(mr[reg].reg);
if (!ar[a].pointerified) {
emit_->MOVK(ARM64Reg(X0 + a), ((uint64_t)Memory::base) >> 32, SHIFT_32);
ARM64Reg a = DecodeReg(mr[reg].reg);
if (!jo_->enablePointerify) {
// First, flush the value.
if (ar[a].isDirty) {
ARM64Reg storeReg = ARM64RegForFlush(ar[a].mipsReg);
if (storeReg != INVALID_REG)
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[a].mipsReg));
ar[a].isDirty = false;
}

// Convert to a pointer by adding the base and clearing off the top bits.
// If SP, we can probably avoid the top bit clear, let's play with that later.
emit_->ANDI2R(a, a, 0x3FFFFFFF, INVALID_REG);
emit_->ADD(ARM64Reg(X0 + (int)a), ARM64Reg(X0 + (int)a), MEMBASEREG);
mr[reg].loc = ML_ARMREG_AS_PTR;
} else if (!ar[a].pointerified) {
emit_->MOVK(ARM64Reg(X0 + (int)a), ((uint64_t)Memory::base) >> 32, SHIFT_32);
ar[a].pointerified = true;
}
} else {
Expand Down Expand Up @@ -450,10 +497,13 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {
mreg.loc = ML_IMM;
mreg.reg = INVALID_REG;
} else {
// Note: may be a 64-bit reg.
ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg);
if (storeReg != INVALID_REG)
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
if (mreg.loc == ML_IMM || ar[r].isDirty) {
_assert_msg_(JIT, mreg.loc != ML_ARMREG_AS_PTR, "Cannot flush reg as pointer");
// Note: may be a 64-bit reg.
ARM64Reg storeReg = ARM64RegForFlush(ar[r].mipsReg);
if (storeReg != INVALID_REG)
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
}
mreg.loc = ML_MEM;
mreg.reg = INVALID_REG;
mreg.imm = 0;
Expand All @@ -465,18 +515,28 @@ void Arm64RegCache::FlushArmReg(ARM64Reg r) {

void Arm64RegCache::DiscardR(MIPSGPReg mipsReg) {
if (mr[mipsReg].isStatic) {
// Simply do nothing unless it's an IMM or ARMREG_IMM, in case we just switch it over to ARMREG, losing the value.
// Simply do nothing unless it's an IMM/ARMREG_IMM/ARMREG_AS_PTR, in case we just switch it over to ARMREG, losing the value.
ARM64Reg armReg = mr[mipsReg].reg;
if (mr[mipsReg].loc == ML_ARMREG_IMM || mr[mipsReg].loc == ML_IMM) {
ARM64Reg armReg = mr[mipsReg].reg;
// Ignore the imm value, restore sanity
mr[mipsReg].loc = ML_ARMREG;
ar[armReg].pointerified = false;
ar[armReg].isDirty = false;
}
if (mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
ARM64Reg loadReg = armReg;
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, GetMipsRegOffset(mipsReg));
mr[mipsReg].loc = ML_ARMREG;
ar[armReg].pointerified = false;
ar[armReg].isDirty = false;
}
return;
}
const RegMIPSLoc prevLoc = mr[mipsReg].loc;
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM) {
if (prevLoc == ML_ARMREG || prevLoc == ML_ARMREG_IMM || prevLoc == ML_ARMREG_AS_PTR) {
ARM64Reg armReg = mr[mipsReg].reg;
ar[armReg].isDirty = false;
ar[armReg].mipsReg = MIPS_REG_INVALID;
Expand Down Expand Up @@ -532,6 +592,9 @@ ARM64Reg Arm64RegCache::ARM64RegForFlush(MIPSGPReg r) {
}
return mr[r].reg;

case ML_ARMREG_AS_PTR:
return INVALID_REG;

case ML_MEM:
return INVALID_REG;

Expand Down Expand Up @@ -578,6 +641,14 @@ void Arm64RegCache::FlushR(MIPSGPReg r) {
ar[mr[r].reg].pointerified = false;
break;

case ML_ARMREG_AS_PTR:
// Never dirty.
if (ar[mr[r].reg].isDirty) {
ERROR_LOG_REPORT(JIT, "ARMREG_AS_PTR cannot be dirty (yet)");
}
ar[mr[r].reg].mipsReg = MIPS_REG_INVALID;
break;

case ML_MEM:
// Already there, nothing to do.
break;
Expand Down Expand Up @@ -654,6 +725,14 @@ void Arm64RegCache::FlushAll() {
ar[armReg].pointerified = false;
}
mr[i].loc = ML_ARMREG;
} else if (mr[i].loc == ML_ARMREG_AS_PTR) {
// Need to reload the register (could also subtract, TODO...)
ARM64Reg loadReg = armReg;
if (mipsReg == MIPS_REG_LO) {
loadReg = EncodeRegTo64(loadReg);
}
emit_->LDR(INDEX_UNSIGNED, loadReg, CTXREG, GetMipsRegOffset(MIPSGPReg(i)));
mr[i].loc = ML_ARMREG;
}
if (i != MIPS_REG_ZERO && mr[i].reg == INVALID_REG) {
ELOG("ARM reg of static %i is invalid", i);
Expand All @@ -667,7 +746,7 @@ void Arm64RegCache::FlushAll() {
int count = 0;
const StaticAllocation *allocs = GetStaticAllocations(count);
for (int i = 0; i < count; i++) {
if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified) {
if (allocs[i].pointerified && !ar[allocs[i].ar].pointerified && jo_->enablePointerify) {
// Re-pointerify
emit_->MOVK(EncodeRegTo64(allocs[i].ar), ((uint64_t)Memory::base) >> 32, SHIFT_32);
ar[allocs[i].ar].pointerified = true;
Expand Down Expand Up @@ -792,7 +871,9 @@ ARM64Reg Arm64RegCache::R(MIPSGPReg mipsReg) {
}

ARM64Reg Arm64RegCache::RPtr(MIPSGPReg mipsReg) {
if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
if (mr[mipsReg].loc == ML_ARMREG_AS_PTR) {
return (ARM64Reg)mr[mipsReg].reg;
} else if (mr[mipsReg].loc == ML_ARMREG || mr[mipsReg].loc == ML_ARMREG_IMM) {
int a = mr[mipsReg].reg;
if (ar[a].pointerified) {
return (ARM64Reg)mr[mipsReg].reg;
Expand Down
2 changes: 2 additions & 0 deletions Core/MIPS/ARM64/Arm64RegCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ enum {
enum RegMIPSLoc {
ML_IMM,
ML_ARMREG,
// In an arm reg, but an adjusted pointer (not pointerified - unaligned.)
ML_ARMREG_AS_PTR,
// In an arm reg, but also has a known immediate value.
ML_ARMREG_IMM,
ML_MEM,
Expand Down
6 changes: 6 additions & 0 deletions Core/MIPS/JitCommon/JitState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,14 @@ namespace MIPSComp {
continueMaxInstructions = 300;

useStaticAlloc = false;
enablePointerify = false;
#if PPSSPP_ARCH(ARM64)
useStaticAlloc = true;
enablePointerify = true;
#endif
#if PPSSPP_PLATFORM(IOS)
useStaticAlloc = false;
enablePointerify = false;
#endif
}
}
1 change: 1 addition & 0 deletions Core/MIPS/JitCommon/JitState.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ namespace MIPSComp {
// ARM64 only
bool useASIMDVFPU;
bool useStaticAlloc;
bool enablePointerify;

// Common
bool enableBlocklink;
Expand Down