diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b09279d45850..d9fd7cec8165 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -957,6 +957,9 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) IntializeSpeculativeConstants(); } + BitSet32 previous_gpr_in_use{}; + BitSet32 previous_fpr_in_use{}; + // Translate instructions for (u32 i = 0; i < code_block.m_num_instructions; i++) { @@ -976,203 +979,215 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.isLastInstruction = true; } - if (i != 0) + if (js.skipInstructions != 0) { - // Gather pipe writes using a non-immediate address are discovered by profiling. - const u32 prev_address = m_code_buffer[i - 1].address; - bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address); - - // Gather pipe writes using an immediate address are explicitly tracked. - if (jo.optimizeGatherPipe && - (js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo)) - { - js.fifoBytesSinceCheck = 0; - js.mustCheckFifo = false; - BitSet32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, 0); - ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo()); - ABI_PopRegistersAndAdjustStack(registersInUse, 0); - gatherPipeIntCheck = true; - } - - // Gather pipe writes can generate an exception; add an exception check. - // TODO: This doesn't really match hardware; the CP interrupt is - // asynchronous. - if (gatherPipeIntCheck) + js.skipInstructions--; + } + else + { + if (i != 0) { - TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); - FixupBranch extException = J_CC(CC_NZ, Jump::Near); - - SwitchToFarCode(); - SetJumpTarget(extException); - TEST(32, PPCSTATE(msr), Imm32(0x0008000)); - FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near); - MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause)); - TEST(32, MatR(RSCRATCH), - Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | - ProcessorInterface::INT_CAUSE_PE_FINISH)); - FixupBranch noCPInt = J_CC(CC_Z, Jump::Near); + // Gather pipe writes using a non-immediate address are discovered by profiling. + const u32 prev_address = m_code_buffer[i - 1].address; + bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address); + // Gather pipe writes using an immediate address are explicitly tracked. + if (jo.optimizeGatherPipe && + (js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo)) { - RCForkGuard gpr_guard = gpr.Fork(); - RCForkGuard fpr_guard = fpr.Fork(); - - gpr.Flush(); - fpr.Flush(); + js.fifoBytesSinceCheck = 0; + js.mustCheckFifo = false; + BitSet32 registersInUse = CallerSavedRegistersInUse(); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); + ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo()); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); + gatherPipeIntCheck = true; + } - MOV(32, PPCSTATE(pc), Imm32(op.address)); - WriteExternalExceptionExit(); + // Gather pipe writes can generate an exception; add an exception check. + // TODO: This doesn't really match hardware; the CP interrupt is + // asynchronous. + if (gatherPipeIntCheck) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_EXTERNAL_INT)); + FixupBranch extException = J_CC(CC_NZ, Jump::Near); + + SwitchToFarCode(); + SetJumpTarget(extException); + TEST(32, PPCSTATE(msr), Imm32(0x0008000)); + FixupBranch noExtIntEnable = J_CC(CC_Z, Jump::Near); + MOV(64, R(RSCRATCH), ImmPtr(&m_system.GetProcessorInterface().m_interrupt_cause)); + TEST(32, MatR(RSCRATCH), + Imm32(ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | + ProcessorInterface::INT_CAUSE_PE_FINISH)); + FixupBranch noCPInt = J_CC(CC_Z, Jump::Near); + + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Flush(); + fpr.Flush(); + + MOV(32, PPCSTATE(pc), Imm32(op.address)); + WriteExternalExceptionExit(); + } + SwitchToNearCode(); + SetJumpTarget(noCPInt); + SetJumpTarget(noExtIntEnable); } - SwitchToNearCode(); - SetJumpTarget(noCPInt); - SetJumpTarget(noExtIntEnable); } - } - if (HandleFunctionHooking(op.address)) - break; + if (HandleFunctionHooking(op.address)) + break; - if (op.skip) - { - if (IsDebuggingEnabled()) - { - // The only thing that currently sets op.skip is the BLR following optimization. - // If any non-branch instruction starts setting that too, this will need to be changed. - ASSERT(op.inst.hex == 0x4e800020); - WriteBranchWatch(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } - } - else - { - auto& cpu = m_system.GetCPU(); - auto& power_pc = m_system.GetPowerPC(); - if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) && - !cpu.IsStepping()) + if (op.skip) { - gpr.Flush(); - fpr.Flush(); - - MOV(32, PPCSTATE(pc), Imm32(op.address)); - ABI_PushRegistersAndAdjustStack({}, 0); - ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc); - ABI_PopRegistersAndAdjustStack({}, 0); - MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr())); - CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running))); - FixupBranch noBreakpoint = J_CC(CC_E); - - Cleanup(); - MOV(32, PPCSTATE(npc), Imm32(op.address)); - SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); - JMP(asm_routines.dispatcher_exit, Jump::Near); - - SetJumpTarget(noBreakpoint); + if (IsDebuggingEnabled()) + { + // The only thing that currently sets op.skip is the BLR following optimization. + // If any non-branch instruction starts setting that too, this will need to be changed. + ASSERT(op.inst.hex == 0x4e800020); + WriteBranchWatch(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2, + CallerSavedRegistersInUse()); + } } - - if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) + else { - // This instruction uses FPU - needs to add FP exception bailout - TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit - FixupBranch b1 = J_CC(CC_Z, Jump::Near); - - SwitchToFarCode(); - SetJumpTarget(b1); + auto& cpu = m_system.GetCPU(); + auto& power_pc = m_system.GetPowerPC(); + if (IsDebuggingEnabled() && power_pc.GetBreakPoints().IsAddressBreakPoint(op.address) && + !cpu.IsStepping()) { - RCForkGuard gpr_guard = gpr.Fork(); - RCForkGuard fpr_guard = fpr.Fork(); - gpr.Flush(); fpr.Flush(); - // If a FPU exception occurs, the exception handler will read - // from PC. Update PC with the latest value in case that happens. MOV(32, PPCSTATE(pc), Imm32(op.address)); - OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); - WriteExceptionExit(); + ABI_PushRegistersAndAdjustStack({}, 0); + ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc); + ABI_PopRegistersAndAdjustStack({}, 0); + MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr())); + CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running))); + FixupBranch noBreakpoint = J_CC(CC_E); + + Cleanup(); + MOV(32, PPCSTATE(npc), Imm32(op.address)); + SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount)); + JMP(asm_routines.dispatcher_exit, Jump::Near); + + SetJumpTarget(noBreakpoint); } - SwitchToNearCode(); - js.firstFPInstructionFound = true; - } - - if (bJITRegisterCacheOff) - { - gpr.Flush(); - fpr.Flush(); - } - else - { - // If we have an input register that is going to be used again, load it pre-emptively, - // even if the instruction doesn't strictly need it in a register, to avoid redundant - // loads later. Of course, don't do this if we're already out of registers. - // As a bit of a heuristic, make sure we have at least one register left over for the - // output, which needs to be bound in the actual instruction compilation. - // TODO: make this smarter in the case that we're actually register-starved, i.e. - // prioritize the more important registers. - gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); - fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); - } - - CompileInstruction(op); - - js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; - - if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - // If we have a fastmem loadstore, we can omit the exception check and let fastmem handle - // it. - FixupBranch memException; - ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler), - "Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!", - op.address); - if (!js.fastmemLoadStore && !js.fixupExceptionHandler) + if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) { - TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); - memException = J_CC(CC_NZ, Jump::Near); + // This instruction uses FPU - needs to add FP exception bailout + TEST(32, PPCSTATE(msr), Imm32(1 << 13)); // Test FP enabled bit + FixupBranch b1 = J_CC(CC_Z, Jump::Near); + + SwitchToFarCode(); + SetJumpTarget(b1); + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Flush(); + fpr.Flush(); + + // If a FPU exception occurs, the exception handler will read + // from PC. Update PC with the latest value in case that happens. + MOV(32, PPCSTATE(pc), Imm32(op.address)); + OR(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE)); + WriteExceptionExit(); + } + SwitchToNearCode(); + + js.firstFPInstructionFound = true; } - SwitchToFarCode(); - if (!js.fastmemLoadStore) + if (bJITRegisterCacheOff) { - m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr; - SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException); + gpr.Flush(); + fpr.Flush(); } else { - m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr(); + // If we have an input register that is going to be used again, load it pre-emptively, + // even if the instruction doesn't strictly need it in a register, to avoid redundant + // loads later. Of course, don't do this if we're already out of registers. + // As a bit of a heuristic, make sure we have at least one register left over for the + // output, which needs to be bound in the actual instruction compilation. + // TODO: make this smarter in the case that we're actually register-starved, i.e. + // prioritize the more important registers. + gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); } - RCForkGuard gpr_guard = gpr.Fork(); - RCForkGuard fpr_guard = fpr.Fork(); + CompileInstruction(op); - gpr.Revert(); - fpr.Revert(); - gpr.Flush(); - fpr.Flush(); + js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; - MOV(32, PPCSTATE(pc), Imm32(op.address)); - WriteExceptionExit(); - SwitchToNearCode(); - } + if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) + { + // If we have a fastmem loadstore, we can omit the exception check and let fastmem handle + // it. + FixupBranch memException; + ASSERT_MSG(DYNA_REC, !(js.fastmemLoadStore && js.fixupExceptionHandler), + "Fastmem loadstores shouldn't have exception handler fixups (PC={:x})!", + op.address); + if (!js.fastmemLoadStore && !js.fixupExceptionHandler) + { + TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI)); + memException = J_CC(CC_NZ, Jump::Near); + } + + SwitchToFarCode(); + if (!js.fastmemLoadStore) + { + m_exception_handler_at_loc[js.fastmemLoadStore] = nullptr; + SetJumpTarget(js.fixupExceptionHandler ? js.exceptionHandler : memException); + } + else + { + m_exception_handler_at_loc[js.fastmemLoadStore] = GetWritableCodePtr(); + } - gpr.Commit(); - fpr.Commit(); + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); - // If we have a register that will never be used again, discard or flush it. - if (!bJITRegisterCacheOff) - { - gpr.Discard(op.gprDiscardable); - fpr.Discard(op.fprDiscardable); + gpr.Revert(); + fpr.Revert(); + gpr.Flush(); + fpr.Flush(); + + MOV(32, PPCSTATE(pc), Imm32(op.address)); + WriteExceptionExit(); + SwitchToNearCode(); + } + + gpr.Commit(); + fpr.Commit(); } - gpr.Flush(~op.gprInUse & (op.regsIn | op.regsOut)); - fpr.Flush(~op.fprInUse & (op.fregsIn | op.GetFregsOut())); + } - if (opinfo->flags & FL_LOADSTORE) - ++js.numLoadStoreInst; + if (opinfo->flags & FL_LOADSTORE) + ++js.numLoadStoreInst; - if (opinfo->flags & FL_USE_FPU) - ++js.numFloatingPointInst; + if (opinfo->flags & FL_USE_FPU) + ++js.numFloatingPointInst; + + js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; + + // If we have a register that will never be used again, discard or flush it. + if (!bJITRegisterCacheOff) + { + gpr.Discard(op.gprDiscardable); + fpr.Discard(op.fprDiscardable); } + gpr.Flush(~op.gprInUse & previous_gpr_in_use); + fpr.Flush(~op.fprInUse & previous_fpr_in_use); + + previous_gpr_in_use = op.gprInUse; + previous_fpr_in_use = op.fprInUse; #if defined(_DEBUG) || defined(DEBUGFAST) if (!gpr.SanityCheck() || !fpr.SanityCheck()) @@ -1181,8 +1196,6 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) NOTICE_LOG_FMT(DYNA_REC, "Unflushed register: {}", ppc_inst); } #endif - i += js.skipInstructions; - js.skipInstructions = 0; } if (code_block.m_broken) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 51925d0d4802..90bb81195ed7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -1171,6 +1171,10 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) IntializeSpeculativeConstants(); } + BitSet32 previous_gpr_in_use{}; + BitSet32 previous_fpr_in_use{}; + BitSet8 previous_cr_in_use{}; + // Translate instructions for (u32 i = 0; i < code_block.m_num_instructions; i++) { @@ -1184,189 +1188,197 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) js.downcountAmount += opinfo->num_cycles; js.isLastInstruction = i == (code_block.m_num_instructions - 1); - // Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps - if (op.inst.OPCD != 46 && op.inst.OPCD != 47) - gpr.UpdateLastUsed(op.regsIn | op.regsOut); - - BitSet32 fpr_used = op.fregsIn; - if (op.fregOut >= 0) - fpr_used[op.fregOut] = true; - fpr.UpdateLastUsed(fpr_used); - - if (i != 0) + if (js.skipInstructions != 0) { - // Gather pipe writes using a non-immediate address are discovered by profiling. - const u32 prev_address = m_code_buffer[i - 1].address; - bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address); - - if (jo.optimizeGatherPipe && - (js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo)) - { - js.fifoBytesSinceCheck = 0; - js.mustCheckFifo = false; - - gpr.Lock(ARM64Reg::W30); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; - - ABI_PushRegisters(regs_in_use); - m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30); - ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo()); - m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30); - ABI_PopRegisters(regs_in_use); - - gpr.Unlock(ARM64Reg::W30); - gatherPipeIntCheck = true; - } - // Gather pipe writes can generate an exception; add an exception check. - // TODO: This doesn't really match hardware; the CP interrupt is - // asynchronous. - if (jo.optimizeGatherPipe && gatherPipeIntCheck) - { - auto WA = gpr.GetScopedReg(); - ARM64Reg XA = EncodeRegTo64(WA); - - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT)); - FixupBranch exception = B(); - SwitchToFarCode(); - const u8* done_here = GetCodePtr(); - FixupBranch exit = B(); - SetJumpTarget(exception); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); - TBZ(WA, 15, done_here); // MSR.EE - LDR(IndexType::Unsigned, WA, XA, - MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause)); - constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | - ProcessorInterface::INT_CAUSE_PE_TOKEN | - ProcessorInterface::INT_CAUSE_PE_FINISH; - TST(WA, LogicalImm(cause_mask, GPRSize::B32)); - B(CC_EQ, done_here); - - gpr.Flush(FlushMode::MaintainState, WA); - fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - WriteExceptionExit(js.compilerPC, true, true); - SwitchToNearCode(); - SetJumpTarget(no_ext_exception); - SetJumpTarget(exit); - } - } - - if (HandleFunctionHooking(op.address)) - break; - - if (op.skip) - { - if (IsDebuggingEnabled()) - { - // The only thing that currently sets op.skip is the BLR following optimization. - // If any non-branch instruction starts setting that too, this will need to be changed. - ASSERT(op.inst.hex == 0x4e800020); - const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg(); - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); - } + js.skipInstructions--; } else { - if (IsDebuggingEnabled() && !cpu.IsStepping() && - m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address)) - { - FlushCarry(); - gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + // Skip calling UpdateLastUsed for lmw/stmw - it usually hurts more than it helps + if (op.inst.OPCD != 46 && op.inst.OPCD != 47) + gpr.UpdateLastUsed(op.regsIn | op.regsOut); - static_assert(PPCSTATE_OFF(pc) <= 252); - static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc)); + BitSet32 fpr_used = op.fregsIn; + if (op.fregOut >= 0) + fpr_used[op.fregOut] = true; + fpr.UpdateLastUsed(fpr_used); - MOVI2R(DISPATCHER_PC, op.address); - STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); - ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC()); - - LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, - MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr())); - static_assert(Common::ToUnderlying(CPU::State::Running) == 0); - FixupBranch no_breakpoint = CBZ(ARM64Reg::W0); + if (i != 0) + { + // Gather pipe writes using a non-immediate address are discovered by profiling. + const u32 prev_address = m_code_buffer[i - 1].address; + bool gatherPipeIntCheck = js.fifoWriteAddresses.contains(prev_address); - Cleanup(); - if (IsProfilingEnabled()) + if (jo.optimizeGatherPipe && + (js.fifoBytesSinceCheck >= GPFifo::GATHER_PIPE_SIZE || js.mustCheckFifo)) { - ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(), - js.downcountAmount); + js.fifoBytesSinceCheck = 0; + js.mustCheckFifo = false; + + gpr.Lock(ARM64Reg::W30); + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; + + ABI_PushRegisters(regs_in_use); + m_float_emit.ABI_PushRegisters(fprs_in_use, ARM64Reg::X30); + ABI_CallFunction(&GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo()); + m_float_emit.ABI_PopRegisters(fprs_in_use, ARM64Reg::X30); + ABI_PopRegisters(regs_in_use); + + gpr.Unlock(ARM64Reg::W30); + gatherPipeIntCheck = true; } - DoDownCount(); - B(dispatcher_exit); - - SetJumpTarget(no_breakpoint); - } - - if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) - { - FixupBranch b1; - // This instruction uses FPU - needs to add FP exception bailout + // Gather pipe writes can generate an exception; add an exception check. + // TODO: This doesn't really match hardware; the CP interrupt is + // asynchronous. + if (jo.optimizeGatherPipe && gatherPipeIntCheck) { auto WA = gpr.GetScopedReg(); - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); - b1 = TBNZ(WA, 13); // Test FP enabled bit + ARM64Reg XA = EncodeRegTo64(WA); - FixupBranch far_addr = B(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + FixupBranch no_ext_exception = TBZ(WA, MathUtil::IntLog2(EXCEPTION_EXTERNAL_INT)); + FixupBranch exception = B(); SwitchToFarCode(); - SetJumpTarget(far_addr); + const u8* done_here = GetCodePtr(); + FixupBranch exit = B(); + SetJumpTarget(exception); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); + TBZ(WA, 15, done_here); // MSR.EE + LDR(IndexType::Unsigned, WA, XA, + MOVPage2R(XA, &m_system.GetProcessorInterface().m_interrupt_cause)); + constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | + ProcessorInterface::INT_CAUSE_PE_TOKEN | + ProcessorInterface::INT_CAUSE_PE_FINISH; + TST(WA, LogicalImm(cause_mask, GPRSize::B32)); + B(CC_EQ, done_here); gpr.Flush(FlushMode::MaintainState, WA); fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); + WriteExceptionExit(js.compilerPC, true, true); + SwitchToNearCode(); + SetJumpTarget(no_ext_exception); + SetJumpTarget(exit); + } + } - LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); - ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32)); - STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + if (HandleFunctionHooking(op.address)) + break; + + if (op.skip) + { + if (IsDebuggingEnabled()) + { + // The only thing that currently sets op.skip is the BLR following optimization. + // If any non-branch instruction starts setting that too, this will need to be changed. + ASSERT(op.inst.hex == 0x4e800020); + const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg(); + const BitSet32 gpr_caller_save = + gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; + WriteBranchWatch(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b, + gpr_caller_save, fpr.GetCallerSavedUsed()); + } + } + else + { + if (IsDebuggingEnabled() && !cpu.IsStepping() && + m_system.GetPowerPC().GetBreakPoints().IsAddressBreakPoint(op.address)) + { + FlushCarry(); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + + static_assert(PPCSTATE_OFF(pc) <= 252); + static_assert(PPCSTATE_OFF(pc) + 4 == PPCSTATE_OFF(npc)); + + MOVI2R(DISPATCHER_PC, op.address); + STP(IndexType::Signed, DISPATCHER_PC, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc)); + ABI_CallFunction(&PowerPC::CheckAndHandleBreakPointsFromJIT, &m_system.GetPowerPC()); + + LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, + MOVPage2R(ARM64Reg::X0, cpu.GetStatePtr())); + static_assert(Common::ToUnderlying(CPU::State::Running) == 0); + FixupBranch no_breakpoint = CBZ(ARM64Reg::W0); + + Cleanup(); + if (IsProfilingEnabled()) + { + ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, b->profile_data.get(), + js.downcountAmount); + } + DoDownCount(); + B(dispatcher_exit); + + SetJumpTarget(no_breakpoint); } - WriteExceptionExit(js.compilerPC, false, true); + if ((opinfo->flags & FL_USE_FPU) && !js.firstFPInstructionFound) + { + FixupBranch b1; + // This instruction uses FPU - needs to add FP exception bailout + { + auto WA = gpr.GetScopedReg(); + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); + b1 = TBNZ(WA, 13); // Test FP enabled bit - SwitchToNearCode(); + FixupBranch far_addr = B(); + SwitchToFarCode(); + SetJumpTarget(far_addr); - SetJumpTarget(b1); + gpr.Flush(FlushMode::MaintainState, WA); + fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - js.firstFPInstructionFound = true; - } + LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + ORR(WA, WA, LogicalImm(EXCEPTION_FPU_UNAVAILABLE, GPRSize::B32)); + STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(Exceptions)); + } - if (bJITRegisterCacheOff) - { - FlushCarry(); - gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - } + WriteExceptionExit(js.compilerPC, false, true); - CompileInstruction(op); + SwitchToNearCode(); - js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; + SetJumpTarget(b1); - if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer) - FlushCarry(); + js.firstFPInstructionFound = true; + } - // If we have a register that will never be used again, discard or flush it. - if (!bJITRegisterCacheOff) - { - gpr.DiscardRegisters(op.gprDiscardable); - fpr.DiscardRegisters(op.fprDiscardable); - gpr.DiscardCRRegisters(op.crDiscardable); + if (bJITRegisterCacheOff) + { + FlushCarry(); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + } + + CompileInstruction(op); } - gpr.StoreRegisters(~op.gprInUse & (op.regsIn | op.regsOut)); - fpr.StoreRegisters(~op.fprInUse & (op.fregsIn | op.GetFregsOut())); - gpr.StoreCRRegisters(~op.crInUse & (op.crIn | op.crOut)); + } + + if (opinfo->flags & FL_LOADSTORE) + ++js.numLoadStoreInst; - if (opinfo->flags & FL_LOADSTORE) - ++js.numLoadStoreInst; + if (opinfo->flags & FL_USE_FPU) + ++js.numFloatingPointInst; - if (opinfo->flags & FL_USE_FPU) - ++js.numFloatingPointInst; + js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; + + if (!CanMergeNextInstructions(1) || js.op[1].opinfo->type != ::OpType::Integer) + FlushCarry(); + + // If we have a register that will never be used again, discard or flush it. + if (!bJITRegisterCacheOff) + { + gpr.DiscardRegisters(op.gprDiscardable); + fpr.DiscardRegisters(op.fprDiscardable); + gpr.DiscardCRRegisters(op.crDiscardable); } + gpr.StoreRegisters(~op.gprInUse & previous_gpr_in_use); + fpr.StoreRegisters(~op.fprInUse & previous_fpr_in_use); + gpr.StoreCRRegisters(~op.crInUse & previous_cr_in_use); - i += js.skipInstructions; - js.skipInstructions = 0; + previous_gpr_in_use = op.gprInUse; + previous_fpr_in_use = op.fprInUse; + previous_cr_in_use = op.crInUse; } if (code_block.m_broken)