From b52106d8839e5dcd6bb3f35890f1073b3d4bb873 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Jan 2024 10:29:55 +0100 Subject: [PATCH 01/16] Fix recursive generics for ARM --- .../DependencyAnalysis/Target_ARM/ARMEmitter.cs | 9 +++++++++ .../Target_ARM/ARMReadyToRunGenericHelperNode.cs | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs index 822a351610b9b..8562927f83878 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_ARM/ARMEmitter.cs @@ -216,5 +216,14 @@ public void EmitRETIfEqual() EmitBNE(4); EmitRET(); } + + // beq label(+4): ret(2) + next(2) + // bx lr + // label: ... + public void EmitRETIfNotEqual() + { + EmitBEQ(4); + EmitRET(); + } } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs index ab985e89a9b90..4cde5328b39c8 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs @@ -39,6 +39,18 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARMEmitter encoder, // Load the generic dictionary cell encoder.EmitLDR(result, context, dictionarySlot * factory.Target.PointerSize); + + // If there's any invalid entries, we need to test for them + // + // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // flag can change over the course of compilation and the bad slot helper dependency + // should be reported by someone else - the system should not rely on it coming from here. + if (!relocsOnly && _hasInvalidEntries) + { + encoder.EmitCMP(result, 0); + encoder.EmitRETIfNotEqual(); + encoder.EmitJMP(GetBadSlotHelper(factory)); + } } protected sealed override void EmitCode(NodeFactory factory, ref ARMEmitter encoder, bool relocsOnly) From 292717764feec59ab0a4545fa6fab379df0be582 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Jan 2024 10:31:29 +0100 Subject: [PATCH 02/16] Fix compilation on Debian Bookworm --- src/native/external/llvm-libunwind/src/AddressSpace.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/native/external/llvm-libunwind/src/AddressSpace.hpp b/src/native/external/llvm-libunwind/src/AddressSpace.hpp index 26d289068b38c..dff3249bf9bfc 100644 --- a/src/native/external/llvm-libunwind/src/AddressSpace.hpp +++ b/src/native/external/llvm-libunwind/src/AddressSpace.hpp @@ -583,11 +583,7 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, // `_dl_find_object`. Use _LIBUNWIND_SUPPORT_DWARF_INDEX, because libunwind // support for _dl_find_object on other unwind formats is not implemented, // yet. -#if defined(DLFO_STRUCT_HAS_EH_DBASE) & defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - // We expect `_dl_find_object` to return PT_GNU_EH_FRAME. -#if DLFO_EH_SEGMENT_TYPE != PT_GNU_EH_FRAME -#error _dl_find_object retrieves an unexpected section type -#endif +#if defined(DLFO_STRUCT_HAS_EH_DBASE) && defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) && DLFO_EH_SEGMENT_TYPE == PT_GNU_EH_FRAME // We look-up `dl_find_object` dynamically at runtime to ensure backwards // compatibility with earlier version of glibc not yet providing it. On older // systems, we gracefully fallback to `dl_iterate_phdr`. Cache the pointer From 320eea904ddfb3599fe12f0a7d7ebdbe11d30db9 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Jan 2024 10:33:20 +0100 Subject: [PATCH 03/16] Implement thread return address hijacking for ARM --- .../nativeaot/Runtime/StackFrameIterator.cpp | 2 + src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 108 +++++++++++++++++- .../nativeaot/Runtime/unix/UnixContext.cpp | 4 + .../nativeaot/Runtime/unix/UnixContext.h | 2 + .../Runtime/unix/UnixNativeCodeManager.cpp | 19 ++- .../nativeaot/Runtime/unix/unixasmmacros.inc | 3 + .../Runtime/unix/unixasmmacrosamd64.inc | 3 - .../Runtime/unix/unixasmmacrosarm.inc | 2 + .../Runtime/unix/unixasmmacrosarm64.inc | 3 - 9 files changed, 135 insertions(+), 11 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 1943aa2bcb6d7..457df9dbe8284 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -609,6 +609,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR0 = (PTR_UIntNative)PTR_TO_REG(pCtx, R0); m_RegDisplay.pR1 = (PTR_UIntNative)PTR_TO_REG(pCtx, R1); + m_RegDisplay.pR2 = (PTR_UIntNative)PTR_TO_REG(pCtx, R2); + m_RegDisplay.pR3 = (PTR_UIntNative)PTR_TO_REG(pCtx, R3); m_RegDisplay.pR4 = (PTR_UIntNative)PTR_TO_REG(pCtx, R4); m_RegDisplay.pR5 = (PTR_UIntNative)PTR_TO_REG(pCtx, R5); m_RegDisplay.pR6 = (PTR_UIntNative)PTR_TO_REG(pCtx, R6); diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 52fee44368288..bfeab32f95ec8 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -8,6 +8,103 @@ #include "AsmOffsets.inc" .global RhpGcPoll2 +.global RhpThrowHwEx + +// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers +// and accepts the register bitmask +// Call this macro first in the method (no further prolog instructions can be added after this). +// +// threadReg : register containing the Thread* (this will be preserved). +// trashReg : register that can be trashed by this macro +// BITMASK : value to initialize m_dwFlags field with (register or #constant) +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + // incoming register values into it. + PROLOG_STACK_ALLOC 4 // Space for saved APSR + PROLOG_VPUSH "{d0-d3}" // Save floating point return registers + PROLOG_PUSH "{r0-r3,lr}" // Save volatile registers + PROLOG_STACK_ALLOC 4 // Space for caller's SP + PROLOG_PUSH "{r4-r6,r8-r10}" // Save non-volatile registers + PROLOG_STACK_ALLOC 8 // Space for flags and Thread* + PROLOG_PUSH "{r7}" // Save caller's frame pointer + PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address + + str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov \trashReg, \BITMASK + str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + // Compute SP value at entry to this method and save it in slot of the frame. + add \trashReg, sp, #(22 * 4) + str \trashReg, [sp, #(11 * 4)] + + // Link the frame into the Thread + str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] +.endm + +// +// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +// registers and return value to their values from before the probe was called (while also updating any +// object refs or byrefs). +// +.macro POP_PROBE_FRAME + EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address + EPILOG_POP "{r7}" // Restore caller's frame pointer + EPILOG_STACK_FREE 8 // Discard flags and Thread* + EPILOG_POP "{r4-r6,r8-r10}" // Restore non-volatile registers + EPILOG_STACK_FREE 4 // Discard caller's SP + EPILOG_POP "{r0-r3,lr}" // Restore volatile registers + EPILOG_VPOP "{d0-d3}" // Restore floating point return registers + EPILOG_STACK_FREE 4 // Space for saved APSR +.endm + +// +// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +// clears the hijack state. +// +// Register state on entry: +// All registers correct for return to the original return address. +// +// Register state on exit: +// r2: thread pointer +// r12: trashed +// +.macro FixupHijackedCallstack + mov r12, r0 + + // r0 <- GetThread() + INLINE_GETTHREAD + + mov r2, r0 + mov r0, r12 + + // Fix the stack by restoring the original return address + ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + + // Clear hijack state + mov r12, #0 + str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +.endm + +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME r2, r3, r12 + + ldr r0, [r2, #OFFSETOF__Thread__m_pDeferredTransitionFrame] + bl RhpWaitForGC2 + + ldr r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tst r2, #PTFF_THREAD_ABORT + bne LOCAL_LABEL(ThrowThreadAbort) + + POP_PROBE_FRAME + bx lr + +LOCAL_LABEL(ThrowThreadAbort): + POP_PROBE_FRAME + mov r0, #STATUS_REDHAWK_THREAD_ABORT + mov r1, lr // return address as exception PC + b C_FUNC(RhpThrowHwEx) +NESTED_END RhpWaitForGC LEAF_ENTRY RhpGcPoll PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r0 @@ -24,8 +121,15 @@ NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler NESTED_END RhpGcPollRare NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler - // Not implemented - EMIT_BREAKPOINT + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r3 + tst r3, #TrapThreadsFlags_None + bne LOCAL_LABEL(WaitForGC) + bx lr +LOCAL_LABEL(WaitForGC): + mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) + b RhpWaitForGC NESTED_END RhpGcProbeHijack #ifdef FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 515b6002cf059..4087e2ae6ac4f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -256,6 +256,8 @@ #define MCREG_Lr(mc) ((mc).arm_lr) #define MCREG_R0(mc) ((mc).arm_r0) #define MCREG_R1(mc) ((mc).arm_r1) +#define MCREG_R2(mc) ((mc).arm_r2) +#define MCREG_R3(mc) ((mc).arm_r3) #define MCREG_R4(mc) ((mc).arm_r4) #define MCREG_R5(mc) ((mc).arm_r5) #define MCREG_R6(mc) ((mc).arm_r6) @@ -514,6 +516,8 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::Lr(){ return (uint64_t&)MCREG_Lr(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R0(){ return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R1(){ return (uint64_t&)MCREG_R1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R2(){ return (uint64_t&)MCREG_R2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R3(){ return (uint64_t&)MCREG_R3(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R4(){ return (uint64_t&)MCREG_R4(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R5(){ return (uint64_t&)MCREG_R5(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R6(){ return (uint64_t&)MCREG_R6(ctx.uc_mcontext); } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index bfa99e04c60a1..e842ac60851f7 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -126,6 +126,8 @@ struct UNIX_CONTEXT uint64_t& Lr(); uint64_t& R0(); uint64_t& R1(); + uint64_t& R2(); + uint64_t& R3(); uint64_t& R4(); uint64_t& R5(); uint64_t& R6(); diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 2a75e83bbfdfb..ce4eaf17687ab 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -775,13 +775,25 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Decode the GC info for the current method to determine its return type GcInfoDecoderFlags flags = DECODE_RETURN_KIND; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) + uint32_t codeOffset = 0; +#if defined(TARGET_ARM) + PTR_UInt8 gcInfo; + codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo); + flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS | DECODE_INTERRUPTIBILITY); +#elif defined(TARGET_ARM64) flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); #endif // TARGET_ARM || TARGET_ARM64 - GcInfoDecoder decoder(GCInfoToken(p), flags); + GcInfoDecoder decoder(GCInfoToken(p), flags, codeOffset); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); +#if defined(TARGET_ARM) + // FIXME: Figure out how to encode this correctly or implement TrailingEpilogueInstructionsCount + if (!decoder.IsInterruptible()) + { + return false; + } +#else int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)pRegisterSet->IP); if (epilogueInstructions < 0) { @@ -793,6 +805,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); return true; } +#endif #if defined(TARGET_APPLE) && defined(TARGET_ARM64) // If we are inside a prolog without a saved frame then we cannot safely unwind. @@ -822,7 +835,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() - sizeof(TADDR)); return true; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_ARM) if (decoder.HasTailCalls()) { diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index bde1d517b7e82..80f633327c830 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -3,6 +3,9 @@ #define INVALIDGCVALUE 0xCCCCCCCD +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + // Enforce subsections via symbols to workaround bugs in Xcode 15 linker. #if defined(__APPLE__) .subsections_via_symbols diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 11225c0ad5ed2..05786d0fe470e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -252,9 +252,6 @@ C_FUNC(\Name): #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -#define STATUS_REDHAWK_THREAD_ABORT 0x43 - // // Rename fields of nested structs // diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 7961fdb1dbcf4..d5b0721f3b9b2 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -17,6 +17,8 @@ #define PTFF_SAVE_ALL_PRESERVED 0x0000007F // NOTE: R11 is not included in this set! #define PTFF_SAVE_SP 0x00000100 #define PTFF_SAVE_R0 0x00000200 +#define PTFF_THREAD_ABORT 0x00100000 + #define DEFAULT_FRAME_SAVE_FLAGS (PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP) // These must match the TrapThreadsFlags enum diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc index 5bd5db64e983a..8361b06fe6975 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm64.inc @@ -372,9 +372,6 @@ PTFF_THREAD_ABORT_BIT = 36 TrapThreadsFlags_AbortInProgress_Bit = 0 TrapThreadsFlags_TrapThreads_Bit = 1 -// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT -#define STATUS_REDHAWK_THREAD_ABORT 0x43 - // These must match the TrapThreadsFlags enum #define TrapThreadsFlags_None 0 #define TrapThreadsFlags_AbortInProgress 1 From edcacb7aaec557e78e88de41e05fe0f9f941ed0e Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Jan 2024 23:09:57 +0100 Subject: [PATCH 04/16] Implement TrailingEpilogueInstructionsCount for ARM --- .../Runtime/unix/UnixNativeCodeManager.cpp | 245 ++++++++++++++++-- 1 file changed, 226 insertions(+), 19 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index ce4eaf17687ab..1f022c3188f92 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -367,13 +367,18 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) { MethodInfo * pMethodInfo = NULL; +#if defined(TARGET_ARM) + // FIXME!!! + pvAddress = (PTR_VOID)((uintptr_t)pvAddress & ~1); +#endif + #if defined(TARGET_ARM64) MethodInfo methodInfo; FindMethodInfo(pvAddress, &methodInfo); pMethodInfo = &methodInfo; #endif -#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) // VirtualUnwind can't unwind epilogues and some prologues. return TrailingEpilogueInstructionsCount(pMethodInfo, pvAddress) == 0 && IsInProlog(pMethodInfo, pvAddress) != 1; #else @@ -449,6 +454,148 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return savedFpLr && establishedFp ? 0 : 1; +#elif defined(TARGET_ARM) + +// SUB SP, SP, # +// 1011 0000 1xxx xxxx +#define SUB_SP_IMM_BITS 0xB080 +#define SUB_SP_IMM_MASK 0xFF80 + +// SUB{S}.W SP, SP, # +// 1111 0x01 101x 1101 0xxx 1101 xxxx xxxx +#define SUB_W_SP_IMM_BITS 0xF1AD0D00 +#define SUB_W_SP_IMM_MASK 0xFBEF8F00 + +// SUBW SP, SP, # +// 1111 0x10 1010 1101 0xxx 1101 xxxx xxxx +#define SUBW_SP_IMM_BITS 0xF2AD0D00 +#define SUBW_SP_IMM_MASK 0xFBFF8F00 + +// SUB SP, +// 0100 0100 1xxx x101 +#define SUB_SP_REG_BITS 0x4485 +#define SUB_SP_REG_MASK 0xFF87 + +// SUB{S}.W SP, SP, {, } +// 1110 1011 101x 1101 0xxx 1101 xxxx xxxx +#define SUB_W_SP_REG_BITS 0xEBAD0D00 +#define SUB_W_SP_REG_MASK 0xFFEF8F00 + +// PUSH +// 1011 010x xxxx xxxx +#define PUSH_BITS 0xB400 +#define PUSH_MASK 0xFE00 + +// PUSH.W +// 1110 1001 0010 1101 0x0x xxxx xxxx xxxx +#define PUSH_W_BITS_T2 0xE92D0000 +#define PUSH_W_MASK_T2 0xFFFFA000 + +// PUSH.W +// 1111 1000 0100 1101 xxxx 1101 0000 0100 +#define PUSH_W_BITS_T3 0xF84D0D04 +#define PUSH_W_MASK_T3 0xFFFF0FFF + +// VPUSH +// 1110 1101 0x10 1101 xxxx 1011 xxxx xxxx +#define VPUSH_BITS_T1 0xED2D0B00 +#define VPUSH_MASK_T1 0xFFBF0F00 + +// VPUSH +// 1110 1101 0x10 1101 xxxx 1010 xxxx xxxx +#define VPUSH_BITS_T2 0xED2D0A00 +#define VPUSH_MASK_T2 0xFFBF0F00 + +// POP +// 1011 110x xxxx xxxx +#define POP_BITS 0xBC00 +#define POP_MASK 0xFE00 + +// POP.W +// 1110 1000 1011 1101 +#define POP_W_T2 0xE8BD + +// POP.W +// 1111 1000 0101 1101 +#define POP_W_T3 0xF85D + +// BX LR +#define BX_LR_BITS 0x4770 +#define BX_LR_MASK 0xFFFF + +// MOV SP, R4 +#define MOV_SP_R4 0x46A5 + + uint16_t* pInstr = (uint16_t*)pvAddress; + uint32_t instr = *pInstr; + + if ((instr & SUB_SP_IMM_MASK) == SUB_SP_IMM_BITS || + (instr & PUSH_MASK) == PUSH_BITS) + { + return 1; + } + + instr <<= 16; + instr |= *(pInstr + 1); + + if ((instr & SUB_W_SP_IMM_MASK) == SUB_W_SP_IMM_BITS || + (instr & SUBW_SP_IMM_MASK) == SUBW_SP_IMM_BITS || + (instr & SUB_W_SP_REG_MASK) == SUB_W_SP_REG_BITS || + (instr & PUSH_W_MASK_T2) == PUSH_W_BITS_T2 || + (instr & PUSH_W_MASK_T3) == PUSH_W_BITS_T3 || + (instr & VPUSH_MASK_T1) == VPUSH_BITS_T1 || + (instr & VPUSH_MASK_T2) == VPUSH_BITS_T2) + { + return 1; + } + + // The localloc pattern generated by JIT looks like: + // + // movw r4, #frameSize + // sub r4, sp, r4 + // bl CORINFO_HELP_STACK_PROBE + // mov sp, r4 + // + // or + // + // movw r4, #frameSizeLo16 + // movt r4, #frameSizeHi16 + // sub r4, sp, r4 + // bl CORINFO_HELP_STACK_PROBE + // mov sp, r4 + // + // We can look ahead by couple of instructions and look for "mov sp, rXX". + for (int c = 5; c >= 0; --c) + { + instr = *pInstr; + if (instr == MOV_SP_R4) + { + return 1; + } + + // Bail out on any instruction that's clearly an epilog and can be + // end of the method. + if ((instr & POP_MASK) == POP_BITS || + (instr & BX_LR_MASK) == BX_LR_BITS || + instr == POP_W_T2 || instr == POP_W_T3) + { + return 0; + } + + // Skip over to next instruction + if ((instr & 0xE000) == 0xE000 && (instr & 0xF800) != 0xE000) + { + // 32-but Thumb instruction + pInstr += 2; + } + else + { + pInstr++; + } + } + + return 0; + #else return -1; @@ -733,6 +880,77 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho } } +#elif defined(TARGET_ARM) + +// ADD SP, SP, # +// 1011 0000 0xxx xxxx +#define ADD_SP_IMM_BITS 0xB000 +#define ADD_SP_IMM_MASK 0xFF80 + +// ADD{S}.W SP, SP, # +// 1111 0x01 000x 1101 0xxx 1101 xxxx xxxx +#define ADD_W_SP_IMM_BITS 0xF10D0D00 +#define ADD_W_SP_IMM_MASK 0xFBEF8F00 + +// ADDW SP, SP, # +// 1111 0x10 0000 1101 0xxx 1101 xxxx xxxx +#define ADDW_SP_IMM_BITS 0xF20D0D00 +#define ADDW_SP_IMM_MASK 0xFBFF8F00 + +// ADD SP, +// 0100 0100 1xxx x101 +#define ADD_SP_REG_BITS 0x4485 +#define ADD_SP_REG_MASK 0xFF87 + +// ADD{S}.W SP, SP, {, } +// 1110 1011 000x 1101 0xxx 1101 xxxx xxxx +#define ADD_W_SP_REG_BITS 0xEB0D0D00 +#define ADD_W_SP_REG_MASK 0xFFEF8F00 + +// POP.W +// 1110 1000 1011 1101 xx0x xxxx xxxx xxxx +#define POP_W_BITS_T2 0xE8BD0000 +#define POP_W_MASK_T2 0xFFFF2000 + +// POP.W +// 1111 1000 0101 1101 xxxx 1011 0000 0100 +#define POP_W_BITS_T3 0xF85D0B04 +#define POP_W_MASK_T3 0xFFFF0FFF + +// VPOP +// 1110 1100 1x11 1101 xxxx 1011 xxxx xxxx +#define VPOP_BITS_T1 0xECBD0B00 +#define VPOP_MASK_T1 0xFFBF0F00 + +// VPOP +// 1110 1100 1x11 1101 xxxx 1010 xxxx xxxx +#define VPOP_BITS_T2 0xECBD0A00 +#define VPOP_MASK_T2 0xFFBF0F00 + + uint32_t instr = *(uint16_t*)pvAddress; + + if ((instr & ADD_SP_IMM_MASK) == ADD_SP_IMM_BITS || + (instr & ADD_SP_REG_MASK) == ADD_SP_REG_BITS || + (instr & POP_MASK) == POP_BITS || + (instr & BX_LR_MASK) == BX_LR_BITS) + { + return -1; + } + + instr <<= 16; + instr |= *((uint16_t*)pvAddress + 1); + + if ((instr & ADD_W_SP_IMM_MASK) == ADD_W_SP_IMM_BITS || + (instr & ADDW_SP_IMM_MASK) == ADDW_SP_IMM_BITS || + (instr & ADD_W_SP_REG_MASK) == ADD_W_SP_REG_BITS || + (instr & POP_W_MASK_T2) == POP_W_BITS_T2 || + (instr & POP_W_MASK_T3) == POP_W_BITS_T3 || + (instr & VPOP_MASK_T1) == VPOP_BITS_T1 || + (instr & VPOP_MASK_T2) == VPOP_BITS_T2) + { + return -1; + } + #endif return 0; @@ -775,26 +993,16 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Decode the GC info for the current method to determine its return type GcInfoDecoderFlags flags = DECODE_RETURN_KIND; - uint32_t codeOffset = 0; -#if defined(TARGET_ARM) - PTR_UInt8 gcInfo; - codeOffset = GetCodeOffset(pMethodInfo, (PTR_VOID)pRegisterSet->IP, &gcInfo); - flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS | DECODE_INTERRUPTIBILITY); -#elif defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); #endif // TARGET_ARM || TARGET_ARM64 - GcInfoDecoder decoder(GCInfoToken(p), flags, codeOffset); + GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); + // FIXME!!! + TADDR ip = PCODEToPINSTR((PCODE)pRegisterSet->IP); -#if defined(TARGET_ARM) - // FIXME: Figure out how to encode this correctly or implement TrailingEpilogueInstructionsCount - if (!decoder.IsInterruptible()) - { - return false; - } -#else - int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)pRegisterSet->IP); + int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)ip); if (epilogueInstructions < 0) { // can't figure, possibly a breakpoint instruction @@ -805,9 +1013,8 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); return true; } -#endif -#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +#if (defined(TARGET_APPLE) && defined(TARGET_ARM64)) || defined(TARGET_ARM) // If we are inside a prolog without a saved frame then we cannot safely unwind. // // Some known frame layouts use compact unwind encoding which cannot handle unwinding @@ -815,7 +1022,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // recognized by IsInProlog. Any other instruction sequence, known or unknown, falls // through to the platform unwinder which should have DWARF information about the // frame. - if (IsInProlog(pMethodInfo, (PTR_VOID)pRegisterSet->IP) == 1) + if (IsInProlog(pMethodInfo, (PTR_VOID)ip) == 1) { return false; } From 497ea5baa4ad1de21cdff7534b75559d8d160297 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 15:15:53 +0100 Subject: [PATCH 05/16] Fix comment --- .../Target_ARM/ARMReadyToRunGenericHelperNode.cs | 2 +- .../Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs | 2 +- .../Target_X64/X64ReadyToRunGenericHelperNode.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs index 4cde5328b39c8..a1950d4937350 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM/ARMReadyToRunGenericHelperNode.cs @@ -42,7 +42,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARMEmitter encoder, // If there's any invalid entries, we need to test for them // - // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries // flag can change over the course of compilation and the bad slot helper dependency // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 01f55b542daf1..767475ca336cc 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -42,7 +42,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encode // If there's any invalid entries, we need to test for them // - // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries // flag can change over the course of compilation and the bad slot helper dependency // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs index 1981ed6e1c503..89e4247a44bff 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_X64/X64ReadyToRunGenericHelperNode.cs @@ -44,7 +44,7 @@ protected void EmitDictionaryLookup(NodeFactory factory, ref X64Emitter encoder, // If there's any invalid entries, we need to test for them // - // Only do this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries + // Skip this in relocsOnly to make it easier to weed out bugs - the _hasInvalidEntries // flag can change over the course of compilation and the bad slot helper dependency // should be reported by someone else - the system should not rely on it coming from here. if (!relocsOnly && _hasInvalidEntries) From 1d999885697a31d3d7a8a525dc16378041c9f458 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 15:16:31 +0100 Subject: [PATCH 06/16] Fix bugs in RhpGcProbeHijack logic --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index bfeab32f95ec8..015da8d3bb628 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -24,9 +24,9 @@ PROLOG_VPUSH "{d0-d3}" // Save floating point return registers PROLOG_PUSH "{r0-r3,lr}" // Save volatile registers PROLOG_STACK_ALLOC 4 // Space for caller's SP - PROLOG_PUSH "{r4-r6,r8-r10}" // Save non-volatile registers + PROLOG_PUSH "{r4-r10}" // Save non-volatile registers PROLOG_STACK_ALLOC 8 // Space for flags and Thread* - PROLOG_PUSH "{r7}" // Save caller's frame pointer + PROLOG_PUSH "{r11}" // Save caller's frame pointer PROLOG_PUSH "{r11,lr}" // Save frame-chain pointer and return address str \threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] @@ -34,8 +34,8 @@ str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] // Compute SP value at entry to this method and save it in slot of the frame. - add \trashReg, sp, #(22 * 4) - str \trashReg, [sp, #(11 * 4)] + add \trashReg, sp, #(19 * 4 + 4 * 8) + str \trashReg, [sp, #(12 * 4)] // Link the frame into the Thread str sp, [\threadReg, #OFFSETOF__Thread__m_pDeferredTransitionFrame] @@ -48,9 +48,9 @@ // .macro POP_PROBE_FRAME EPILOG_POP "{r11,lr}" // Restore frame-chain pointer and return address - EPILOG_POP "{r7}" // Restore caller's frame pointer + EPILOG_POP "{r11}" // Restore caller's frame pointer EPILOG_STACK_FREE 8 // Discard flags and Thread* - EPILOG_POP "{r4-r6,r8-r10}" // Restore non-volatile registers + EPILOG_POP "{r4-r10}" // Restore non-volatile registers EPILOG_STACK_FREE 4 // Discard caller's SP EPILOG_POP "{r0-r3,lr}" // Restore volatile registers EPILOG_VPOP "{d0-d3}" // Restore floating point return registers @@ -124,7 +124,7 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler FixupHijackedCallstack PREPARE_EXTERNAL_VAR_INDIRECT RhpTrapThreads, r3 - tst r3, #TrapThreadsFlags_None + tst r3, #TrapThreadsFlags_TrapThreads bne LOCAL_LABEL(WaitForGC) bx lr LOCAL_LABEL(WaitForGC): From a4f2aa6a79e91b0b3fe260b9177bc41812c64f52 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 17:16:58 +0100 Subject: [PATCH 07/16] Fix register trashing by INLINE_GETTHREAD in FixupHijackedCallstack on ARM --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 015da8d3bb628..a13b8992c3b24 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -66,24 +66,25 @@ // // Register state on exit: // r2: thread pointer -// r12: trashed +// r3: trashed // .macro FixupHijackedCallstack - mov r12, r0 + push {r0, r1} // r0 <- GetThread() INLINE_GETTHREAD mov r2, r0 - mov r0, r12 + pop {r0, r1} // Fix the stack by restoring the original return address ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] // Clear hijack state - mov r12, #0 - str r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] - str r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + mov r3, #0 + str r3, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str r3, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + str r3, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] .endm NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler From 6703b084f411c0b9cc9dc2df2249db249452bf1d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 17:18:18 +0100 Subject: [PATCH 08/16] Mask the Thumb bit when loading IP from probe frame --- src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 457df9dbe8284..ca5121950834b 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -21,6 +21,7 @@ #include "threadstore.inl" #include "thread.inl" #include "stressLog.h" +#include "CommonMacros.inl" #include "shash.h" #include "RuntimeInstance.h" @@ -178,7 +179,7 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF #if !defined(USE_PORTABLE_HELPERS) // @TODO: no portable version of regdisplay memset(&m_RegDisplay, 0, sizeof(m_RegDisplay)); - m_RegDisplay.SetIP((PCODE)pFrame->m_RIP); + m_RegDisplay.SetIP((PCODE)PCODEToPINSTR((PCODE)pFrame->m_RIP)); SetControlPC(dac_cast(m_RegDisplay.GetIP())); PTR_UIntNative pPreservedRegsCursor = (PTR_UIntNative)PTR_HOST_MEMBER(PInvokeTransitionFrame, pFrame, m_PreservedRegs); From fa9d979c5c2b3c522c5b1ca518fff44e9bec9776 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 19:39:16 +0100 Subject: [PATCH 09/16] Disable DwarfDump on linux-arm --- src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj b/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj index 8166b640a16dd..9b234b923cd8a 100644 --- a/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj +++ b/src/tests/nativeaot/SmokeTests/DwarfDump/DwarfDump.csproj @@ -4,6 +4,8 @@ 0 true + + true true false From 1097a6b8e605a12974b0c1a698e1047ae0630d54 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Jan 2024 21:48:32 +0100 Subject: [PATCH 10/16] Cleanup --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 10 +++------- .../Runtime/unix/UnixNativeCodeManager.cpp | 13 +++++++------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index a13b8992c3b24..037d51ab01656 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -20,9 +20,7 @@ .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving // incoming register values into it. - PROLOG_STACK_ALLOC 4 // Space for saved APSR - PROLOG_VPUSH "{d0-d3}" // Save floating point return registers - PROLOG_PUSH "{r0-r3,lr}" // Save volatile registers + PROLOG_PUSH "{r0-r1}" // Save return registers PROLOG_STACK_ALLOC 4 // Space for caller's SP PROLOG_PUSH "{r4-r10}" // Save non-volatile registers PROLOG_STACK_ALLOC 8 // Space for flags and Thread* @@ -34,7 +32,7 @@ str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] // Compute SP value at entry to this method and save it in slot of the frame. - add \trashReg, sp, #(19 * 4 + 4 * 8) + add \trashReg, sp, #(15 * 4) str \trashReg, [sp, #(12 * 4)] // Link the frame into the Thread @@ -52,9 +50,7 @@ EPILOG_STACK_FREE 8 // Discard flags and Thread* EPILOG_POP "{r4-r10}" // Restore non-volatile registers EPILOG_STACK_FREE 4 // Discard caller's SP - EPILOG_POP "{r0-r3,lr}" // Restore volatile registers - EPILOG_VPOP "{d0-d3}" // Restore floating point return registers - EPILOG_STACK_FREE 4 // Space for saved APSR + EPILOG_POP "{r0,r1}" // Restore return registers .endm // diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 1f022c3188f92..7a58066fc3c2c 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -368,8 +368,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) MethodInfo * pMethodInfo = NULL; #if defined(TARGET_ARM) - // FIXME!!! - pvAddress = (PTR_VOID)((uintptr_t)pvAddress & ~1); + ASSERT(((uintptr_t)pvAddress & 1) == 0); #endif #if defined(TARGET_ARM64) @@ -999,10 +998,12 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); - // FIXME!!! - TADDR ip = PCODEToPINSTR((PCODE)pRegisterSet->IP); - int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)ip); +#if defined(TARGET_ARM) + ASSERT(((uintptr_t)pRegisterSet->IP & 1) == 0); +#endif + + int epilogueInstructions = TrailingEpilogueInstructionsCount(pMethodInfo, (PTR_VOID)pRegisterSet->IP); if (epilogueInstructions < 0) { // can't figure, possibly a breakpoint instruction @@ -1022,7 +1023,7 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // recognized by IsInProlog. Any other instruction sequence, known or unknown, falls // through to the platform unwinder which should have DWARF information about the // frame. - if (IsInProlog(pMethodInfo, (PTR_VOID)ip) == 1) + if (IsInProlog(pMethodInfo, (PTR_VOID)pRegisterSet->IP) == 1) { return false; } From aead4dbf76daaf85c25b023793b947c1c784ad44 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Jan 2024 10:46:46 +0100 Subject: [PATCH 11/16] Emit DWARF info with instruction addresses without Thumb bit (matches clang) --- src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp | 7 ++++++- .../nativeaot/Runtime/unix/UnixNativeCodeManager.cpp | 4 ++-- .../Compiler/ObjectWriter/UnixObjectWriter.cs | 11 ++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp index c3ebf0a78077f..b02e64580a517 100644 --- a/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp +++ b/src/coreclr/nativeaot/Runtime/RuntimeInstance.cpp @@ -65,7 +65,12 @@ COOP_PINVOKE_HELPER(uint8_t *, RhGetRuntimeVersion, (int32_t* pcbLength)) COOP_PINVOKE_HELPER(uint8_t *, RhFindMethodStartAddress, (void * codeAddr)) { - return dac_cast(GetRuntimeInstance()->FindMethodStartAddress(dac_cast(codeAddr))); + uint8_t *startAddress = dac_cast(GetRuntimeInstance()->FindMethodStartAddress(dac_cast(codeAddr))); +#if TARGET_ARM + return startAddress + 1; // Set the Thumb bit +#else + return startAddress; +#endif } PTR_UInt8 RuntimeInstance::FindMethodStartAddress(PTR_VOID ControlPC) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 7a58066fc3c2c..f32eafd38a68d 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -87,12 +87,12 @@ bool UnixNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, unw_proc_info_t procInfo; - if (!UnwindHelpers::GetUnwindProcInfo(PINSTRToPCODE((TADDR)ControlPC), m_UnwindInfoSections, &procInfo)) + if (!UnwindHelpers::GetUnwindProcInfo((TADDR)ControlPC, m_UnwindInfoSections, &procInfo)) { return false; } - assert((procInfo.start_ip <= PINSTRToPCODE((TADDR)ControlPC)) && (PINSTRToPCODE((TADDR)ControlPC) < procInfo.end_ip)); + assert((procInfo.start_ip <= (TADDR)ControlPC) && ((TADDR)ControlPC < procInfo.end_ip)); pMethodInfo->start_ip = procInfo.start_ip; pMethodInfo->format = procInfo.format; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs index 37d274b794649..78fe7dc30dbaa 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/UnixObjectWriter.cs @@ -75,7 +75,12 @@ private protected override void EmitUnwindInfo( if (nodeWithCodeInfo.FrameInfos is FrameInfo[] frameInfos && nodeWithCodeInfo is ISymbolDefinitionNode) { - bool useFrameNames = UseFrameNames; + // On ARM we always use frame names, even for first frame to ensure that + // DWARF data uses PC references without the Thumb bit set. Using a function + // symbol would propagate the bit. Unfortunately, the R_ARM_REL32_NOI + // relocation is not widely implemented, so using that is not an option. + bool isArm = _nodeFactory.Target.Architecture == TargetArchitecture.ARM; + bool useFrameNames = isArm || UseFrameNames; SectionWriter lsdaSectionWriter; if (ShouldShareSymbol((ObjectNode)nodeWithCodeInfo)) @@ -100,7 +105,7 @@ private protected override void EmitUnwindInfo( string framSymbolName = $"_fram{i}{currentSymbolName}"; lsdaSectionWriter.EmitSymbolDefinition(lsdaSymbolName); - if (start != 0 && useFrameNames) + if (useFrameNames && (isArm || start != 0)) { sectionWriter.EmitSymbolDefinition(framSymbolName, start); } @@ -142,7 +147,7 @@ private protected override void EmitUnwindInfo( } } - string startSymbolName = useFrameNames && start != 0 ? framSymbolName : currentSymbolName; + string startSymbolName = useFrameNames && (isArm || start != 0) ? framSymbolName : currentSymbolName; ulong length = (ulong)(end - start); if (!EmitCompactUnwinding(startSymbolName, length, lsdaSymbolName, blob)) { From 4c4b2613081d0908eac761726bc431719bbdfeaf Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Jan 2024 15:01:51 +0100 Subject: [PATCH 12/16] Report R2/R3 registers in ForEachPossibleObjectRef --- src/coreclr/nativeaot/Runtime/unix/UnixContext.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index e842ac60851f7..471f395bef1f5 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -145,6 +145,8 @@ struct UNIX_CONTEXT { lambda((size_t*)&R0()); lambda((size_t*)&R1()); + lambda((size_t*)&R2()); + lambda((size_t*)&R3()); lambda((size_t*)&R4()); lambda((size_t*)&R5()); lambda((size_t*)&R6()); From 63ab45c5304cadeff13eb45cbf2e33ac9c4ab4b9 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Jan 2024 16:48:59 +0100 Subject: [PATCH 13/16] Ensure that PInvokeTransitionFrame(s) on the stack are 8-byte aligned. Save FP return values on hijack. --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 6 +++++- src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 037d51ab01656..4662f468e7536 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -20,6 +20,8 @@ .macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving // incoming register values into it. + PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value + PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment PROLOG_PUSH "{r0-r1}" // Save return registers PROLOG_STACK_ALLOC 4 // Space for caller's SP PROLOG_PUSH "{r4-r10}" // Save non-volatile registers @@ -32,7 +34,7 @@ str \trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags] // Compute SP value at entry to this method and save it in slot of the frame. - add \trashReg, sp, #(15 * 4) + add \trashReg, sp, #(16 * 4 + 4 * 8) str \trashReg, [sp, #(12 * 4)] // Link the frame into the Thread @@ -51,6 +53,8 @@ EPILOG_POP "{r4-r10}" // Restore non-volatile registers EPILOG_STACK_FREE 4 // Discard caller's SP EPILOG_POP "{r0,r1}" // Restore return registers + EPILOG_STACK_FREE 4 // Discard padding for 8-byte alignment + EPILOG_VPOP "{d0-d3}" // Restore d0-d3 which can have the floating point return value .endm // diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index d5b0721f3b9b2..eaf96c70609ee 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -239,14 +239,14 @@ C_FUNC(\Name): // .macro PUSH_COOP_PINVOKE_FRAME trashReg - PROLOG_STACK_ALLOC 4 // Save space for caller's SP + PROLOG_STACK_ALLOC 8 // Save space for caller's SP and 8-byte alignment padding PROLOG_PUSH "{r4-r10}" // Save preserved registers PROLOG_STACK_ALLOC 8 // Save space for flags and Thread* PROLOG_PUSH "{r11}" // Save caller's FP PROLOG_PUSH "{r11,lr}" // Save caller's frame-chain pointer and PC // Compute SP value at entry to this method and save it in the last slot of the frame (slot #12). - add \trashReg, sp, #(13 * 4) + add \trashReg, sp, #(14 * 4) str \trashReg, [sp, #(12 * 4)] // Record the bitmask of saved registers in the frame (slot #4). @@ -262,7 +262,7 @@ C_FUNC(\Name): EPILOG_POP "{r11}" // Restore caller's FP EPILOG_STACK_FREE 8 // Discard flags and Thread* EPILOG_POP "{r4-r10}" // Restore preserved registers - EPILOG_STACK_FREE 4 // Discard caller's SP + EPILOG_STACK_FREE 8 // Discard caller's SP and 8-byte alignment padding .endm // thumb with PIC version From d6f50c15ef6a7c0e4c146773b40285eac86ab1c0 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Jan 2024 21:35:09 +0100 Subject: [PATCH 14/16] Tame the Thumb bit --- .../nativeaot/Runtime/StackFrameIterator.cpp | 21 ++++++++++++------- .../Runtime/unix/UnixNativeCodeManager.cpp | 9 ++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index ca5121950834b..06cbcd9901f9a 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -76,7 +76,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2); #ifdef DACCESS_COMPILE #define EQUALS_RETURN_ADDRESS(x, func_name) ((x) == g_ ## func_name ## Addr) #else -#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PointerTo ## func_name)) +#define EQUALS_RETURN_ADDRESS(x, func_name) (((x)) == (PTR_VOID)PCODEToPINSTR((PCODE)PointerTo ## func_name)) #endif #ifdef DACCESS_COMPILE @@ -385,9 +385,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO // // control state // - SetControlPC(dac_cast(pCtx->GetIp())); m_RegDisplay.SP = pCtx->GetSp(); - m_RegDisplay.IP = pCtx->GetIp(); + m_RegDisplay.IP = PCODEToPINSTR(pCtx->GetIp()); + SetControlPC(dac_cast(m_RegDisplay.GetIP())); #ifdef TARGET_ARM // @@ -994,7 +994,7 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() #endif #if !defined(TARGET_ARM64) - m_RegDisplay.SetIP(*SP++); + m_RegDisplay.SetIP(PCODEToPINSTR(*SP++)); #endif m_RegDisplay.SetSP((uintptr_t)dac_cast(SP)); @@ -1177,7 +1177,7 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() stackFrame->UnwindNonVolatileRegisters(&m_RegDisplay); PTR_UIntNative addressOfPushedCallerIP = stackFrame->get_AddressOfPushedCallerIP(); - m_RegDisplay.SetIP(*addressOfPushedCallerIP); + m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP)); m_RegDisplay.SetSP((uintptr_t)dac_cast(stackFrame->get_CallerSP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); @@ -1268,9 +1268,9 @@ void StackFrameIterator::UnwindThrowSiteThunk() ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif - m_RegDisplay.SetIP(pContext->IP); + m_RegDisplay.SetIP(PCODEToPINSTR(pContext->IP)); m_RegDisplay.SetSP(pContext->GetSp()); - SetControlPC(dac_cast(pContext->IP)); + SetControlPC(dac_cast(m_RegDisplay.GetIP())); // We expect the throw site to be in managed code, and since this function's notion of how to unwind // through the stub is brittle relative to the stub itself, we want to check as soon as we can. @@ -1360,7 +1360,7 @@ void StackFrameIterator::NextInternal() // if the thread is safe to walk, it better not have a hijack in place. ASSERT(!m_pThread->IsHijacked()); - SetControlPC(dac_cast(m_RegDisplay.GetIP())); + SetControlPC(dac_cast(PCODEToPINSTR(m_RegDisplay.GetIP()))); PTR_VOID collapsingTargetFrame = NULL; @@ -1720,6 +1720,11 @@ bool StackFrameIterator::GetHijackedReturnValueLocation(PTR_OBJECTREF * pLocatio void StackFrameIterator::SetControlPC(PTR_VOID controlPC) { +#if TARGET_ARM + // Ensure that PC doesn't have the Thumb bit set. This needs to be + // consistent for EQUALS_RETURN_ADDRESS to work. + ASSERT(((uintptr_t)controlPC & 1) == 0); +#endif m_OriginalControlPC = m_ControlPC = controlPC; } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index f32eafd38a68d..ee0097b7c0dd6 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -205,6 +205,13 @@ void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, PTR_UInt8 gcInfo; uint32_t codeOffset = GetCodeOffset(pMethodInfo, safePointAddress, &gcInfo); +#ifdef TARGET_ARM + // Ensure that code offset doesn't have the Thumb bit set. We need + // it to be aligned to instruction start to make the !isActiveStackFrame + // branch below work. + ASSERT(((uintptr_t)codeOffset & 1) == 0); +#endif + if (!isActiveStackFrame) { // If we are not in the active method, we are currently pointing @@ -1000,6 +1007,8 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); #if defined(TARGET_ARM) + // Ensure that PC doesn't have the Thumb bit set. Prolog and epilog + // checks depend on it. ASSERT(((uintptr_t)pRegisterSet->IP & 1) == 0); #endif From b794da88433281dfd1d02269247b0f7be5b58aba Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 24 Jan 2024 13:39:57 +0100 Subject: [PATCH 15/16] Fix GC hole when thread hijack happens with r0 register holding a reference (eg. boxed int) --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 4662f468e7536..2a41e9fb755bb 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -79,6 +79,7 @@ // Fix the stack by restoring the original return address ldr lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + ldr r12, [r2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags] // Clear hijack state mov r3, #0 @@ -129,7 +130,8 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler bne LOCAL_LABEL(WaitForGC) bx lr LOCAL_LABEL(WaitForGC): - mov r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) + mov r3, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0) + orr r12, r3 b RhpWaitForGC NESTED_END RhpGcProbeHijack From a5dc76f2e037713df02f32f675915d0cd1fd42d1 Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Wed, 24 Jan 2024 18:44:50 -0800 Subject: [PATCH 16/16] Update src/coreclr/nativeaot/Runtime/arm/GcProbe.S --- src/coreclr/nativeaot/Runtime/arm/GcProbe.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S index 2a41e9fb755bb..c202591d7fcd1 100644 --- a/src/coreclr/nativeaot/Runtime/arm/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/arm/GcProbe.S @@ -22,7 +22,7 @@ // incoming register values into it. PROLOG_VPUSH "{d0-d3}" // Save d0-d3 which can have the floating point return value PROLOG_STACK_ALLOC 4 // Padding for 8-byte alignment - PROLOG_PUSH "{r0-r1}" // Save return registers + PROLOG_PUSH "{r0,r1}" // Save return registers PROLOG_STACK_ALLOC 4 // Space for caller's SP PROLOG_PUSH "{r4-r10}" // Save non-volatile registers PROLOG_STACK_ALLOC 8 // Space for flags and Thread*