diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs index 24378e6a408534..1d90c84e6c9a92 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/CompilerServices/AsyncHelpers.CoreCLR.cs @@ -171,17 +171,8 @@ internal unsafe ref struct AsyncDispatcherInfo public static partial class AsyncHelpers { #if FEATURE_INTERPRETER - [LibraryImport(RuntimeHelpers.QCall, EntryPoint = "AsyncHelpers_ResumeInterpreterContinuation")] - [StackTraceHidden] - private static partial void AsyncHelpers_ResumeInterpreterContinuation(ObjectHandleOnStack cont, ref byte resultStorage); - - [StackTraceHidden] - internal static Continuation? ResumeInterpreterContinuation(Continuation cont, ref byte resultStorage) - { - ObjectHandleOnStack contHandle = ObjectHandleOnStack.Create(ref cont); - AsyncHelpers_ResumeInterpreterContinuation(contHandle, ref resultStorage); - return cont; - } + [MethodImpl(MethodImplOptions.InternalCall)] + internal static extern Continuation? ResumeInterpreterContinuation(Continuation cont, ref byte resultStorage); #endif // This is the "magic" method on which other "Await" methods are built. diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index bb6c9d7c6f3dc0..3ffb3201206c2b 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -58,18 +58,8 @@ class AsmOffsets #if TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8; -#if FEATURE_INTERPRETER -#if TARGET_AMD64 && !TARGET_UNIX - public const int SIZEOF__StackFrameIterator = 0x178; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x170; -#else - public const int SIZEOF__StackFrameIterator = 0x170; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x168; -#endif -#else public const int SIZEOF__StackFrameIterator = 0x150; public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x148; -#endif public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x132; #elif TARGET_X86 public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; @@ -78,13 +68,8 @@ class AsmOffsets public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x3cc; #else // TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; -#if FEATURE_INTERPRETER - public const int SIZEOF__StackFrameIterator = 0xd8; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xd4; -#else public const int SIZEOF__StackFrameIterator = 0xc8; public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xc4; -#endif public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0xba; #endif // TARGET_64BIT @@ -134,18 +119,8 @@ class AsmOffsets #if TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x8; -#if FEATURE_INTERPRETER -#if TARGET_UNIX - public const int SIZEOF__StackFrameIterator = 0x168; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x160; -#else // TARGET_UNIX - public const int SIZEOF__StackFrameIterator = 0x170; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x168; -#endif // TARGET_UNIX -#else public const int SIZEOF__StackFrameIterator = 0x148; public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x140; -#endif public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0x12a; #elif TARGET_X86 public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; @@ -154,13 +129,8 @@ class AsmOffsets public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0x3c4; #else // TARGET_64BIT public const int OFFSETOF__REGDISPLAY__m_pCurrentContext = 0x4; -#if FEATURE_INTERPRETER - public const int SIZEOF__StackFrameIterator = 0xd0; - public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xcc; -#else public const int SIZEOF__StackFrameIterator = 0xc0; public const int OFFSETOF__StackFrameIterator__m_AdjustedControlPC = 0xbc; -#endif public const int OFFSETOF__StackFrameIterator__m_isRuntimeWrappedExceptions = 0xb2; #endif // TARGET_64BIT diff --git a/src/coreclr/pal/inc/unixasmmacrosarm.inc b/src/coreclr/pal/inc/unixasmmacrosarm.inc index e2ff7252d2f521..ffa67f69440e4e 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm.inc @@ -152,10 +152,35 @@ C_FUNC(\Name): // d2 // d1 // d0 <- __PWTB_FloatArgumentRegisters -.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, saveFpArgs = 1, pushArgRegs = 0 +// Optional: Callee saved floating point registers (if pushCalleeSavedFloatRegs=1) +// d15 +// d14 +// d13 +// d12 +// d11 +// d10 +// d9 +// d8 <- __PWTB_FloatCalleeSavedRegisters +// +// pushCalleeSavedFloatRegs - Optional parameter. If set to 1, the macro will also save +// the callee-saved floating point registers (d8-d15) to the stack. +// These registers are NOT restored by the EPILOG_WITH_TRANSITION_BLOCK variants. +// +.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, saveFpArgs = 1, pushArgRegs = 0, pushCalleeSavedFloatRegs = 0 - __PWTB_FloatArgumentRegisters = \extraLocals + __PWTB_FloatCalleeSavedRegisters = \extraLocals __PWTB_SaveFPArgs = \saveFpArgs + __PWTB_PushCalleeSavedFloatRegs = \pushCalleeSavedFloatRegs + + // If pushCalleeSavedFloatRegs is specified, reserve space for d8-d15 (8 registers * 8 bytes = 64 bytes) + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + .if ((__PWTB_FloatCalleeSavedRegisters % 8) != 0) + __PWTB_FloatCalleeSavedRegisters = __PWTB_FloatCalleeSavedRegisters + 4 + .endif + __PWTB_FloatArgumentRegisters = __PWTB_FloatCalleeSavedRegisters + 64 + .else + __PWTB_FloatArgumentRegisters = \extraLocals + .endif .if (__PWTB_SaveFPArgs == 1) .if ((__PWTB_FloatArgumentRegisters % 8) != 0) @@ -189,6 +214,12 @@ C_FUNC(\Name): vstm r6, {d0-d7} .endif + // Save callee-saved floating point registers if requested + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + add r6, sp, #(__PWTB_FloatCalleeSavedRegisters) + vstm r6, {d8-d15} + .endif + CHECK_STACK_ALIGNMENT END_PROLOGUE diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 35e1cc7a637ab7..b374f8b2a83c07 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -198,13 +198,35 @@ C_FUNC(\Name\()_End): // FloatRegisters::q2 // FloatRegisters::q1 // FloatRegisters::q0 -.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1, SaveGPArgs = 1 +// Optional: Callee saved floating point registers (if pushCalleeSavedFloatRegs=1) +// d15 +// d14 +// d13 +// d12 +// d11 +// d10 +// d9 +// d8 +// +// pushCalleeSavedFloatRegs - Optional parameter. If set to 1, the macro will also save +// the callee-saved floating point registers (d8-d15) to the stack. +// These registers are NOT restored by the EPILOG_WITH_TRANSITION_BLOCK variants. +// +.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1, SaveGPArgs = 1, pushCalleeSavedFloatRegs = 0 - __PWTB_FloatArgumentRegisters = \extraLocals + __PWTB_FloatCalleeSavedRegisters = \extraLocals __PWTB_SaveFPArgs = \SaveFPArgs + __PWTB_PushCalleeSavedFloatRegs = \pushCalleeSavedFloatRegs + + .if ((__PWTB_FloatCalleeSavedRegisters % 16) != 0) + __PWTB_FloatCalleeSavedRegisters = __PWTB_FloatCalleeSavedRegisters + 8 + .endif - .if ((__PWTB_FloatArgumentRegisters % 16) != 0) - __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 + // If pushCalleeSavedFloatRegs is specified, reserve space for d8-d15 (8 registers * 8 bytes = 64 bytes) + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + __PWTB_FloatArgumentRegisters = __PWTB_FloatCalleeSavedRegisters + 64 + .else + __PWTB_FloatArgumentRegisters = __PWTB_FloatCalleeSavedRegisters .endif __PWTB_TransitionBlock = __PWTB_FloatArgumentRegisters @@ -234,7 +256,15 @@ C_FUNC(\Name\()_End): .endif .if (__PWTB_SaveFPArgs == 1) - SAVE_FLOAT_ARGUMENT_REGISTERS sp, \extraLocals + SAVE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + .endif + + // Save callee-saved floating point registers if requested + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + PROLOG_SAVE_REG_PAIR d8, d9, __PWTB_FloatCalleeSavedRegisters + PROLOG_SAVE_REG_PAIR d10, d11, __PWTB_FloatCalleeSavedRegisters + 16 + PROLOG_SAVE_REG_PAIR d12, d13, __PWTB_FloatCalleeSavedRegisters + 32 + PROLOG_SAVE_REG_PAIR d14, d15, __PWTB_FloatCalleeSavedRegisters + 48 .endif .endm diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc index d244756c304eb9..406074d6f49436 100644 --- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc @@ -260,14 +260,26 @@ C_FUNC(\Name): // FPR_f8 / fs0 // Extra: // -.macro PROLOG_WITH_TRANSITION_BLOCK extraParameters = 0, extraLocals = 0, SaveFPRegs = 1 +// pushCalleeSavedFloatRegs - Optional parameter. If set to 1, the macro will also save +// the callee-saved floating point registers (fs0-fs11) to the stack. +// These registers are NOT restored by the EPILOG_WITH_TRANSITION_BLOCK variants. +// +.macro PROLOG_WITH_TRANSITION_BLOCK extraParameters = 0, extraLocals = 0, SaveFPRegs = 1, pushCalleeSavedFloatRegs = 0 __PWTB_SaveFPArgs = \SaveFPRegs + __PWTB_PushCalleeSavedFloatRegs = \pushCalleeSavedFloatRegs - __PWTB_FloatArgumentRegisters = \extraLocals + __PWTB_FloatCalleeSavedRegisters = \extraLocals // Note, stack (see __PWTB_StackAlloc variable) must be 16 byte aligned. - .if ((__PWTB_FloatArgumentRegisters % 16) != 0) - __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 + .if ((__PWTB_FloatCalleeSavedRegisters % 16) != 0) + __PWTB_FloatCalleeSavedRegisters = __PWTB_FloatCalleeSavedRegisters + 8 + .endif + + // If pushCalleeSavedFloatRegs is specified, reserve space for fs0-fs11 (12 registers * 8 bytes = 96 bytes) + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + __PWTB_FloatArgumentRegisters = __PWTB_FloatCalleeSavedRegisters + 96 + .else + __PWTB_FloatArgumentRegisters = __PWTB_FloatCalleeSavedRegisters .endif __PWTB_TransitionBlock = __PWTB_FloatArgumentRegisters @@ -296,6 +308,22 @@ C_FUNC(\Name): SAVE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters .endif + // Save callee-saved floating point registers if requested (fs0-fs11) + .if (__PWTB_PushCalleeSavedFloatRegs == 1) + fsd fs0, (__PWTB_FloatCalleeSavedRegisters)(sp) + fsd fs1, (__PWTB_FloatCalleeSavedRegisters + 8)(sp) + fsd fs2, (__PWTB_FloatCalleeSavedRegisters + 16)(sp) + fsd fs3, (__PWTB_FloatCalleeSavedRegisters + 24)(sp) + fsd fs4, (__PWTB_FloatCalleeSavedRegisters + 32)(sp) + fsd fs5, (__PWTB_FloatCalleeSavedRegisters + 40)(sp) + fsd fs6, (__PWTB_FloatCalleeSavedRegisters + 48)(sp) + fsd fs7, (__PWTB_FloatCalleeSavedRegisters + 56)(sp) + fsd fs8, (__PWTB_FloatCalleeSavedRegisters + 64)(sp) + fsd fs9, (__PWTB_FloatCalleeSavedRegisters + 72)(sp) + fsd fs10, (__PWTB_FloatCalleeSavedRegisters + 80)(sp) + fsd fs11, (__PWTB_FloatCalleeSavedRegisters + 88)(sp) + .endif + .endm .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 7805d686dad781..4d7024f890cbc8 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -17,6 +17,7 @@ extern IL_Rethrow_Impl:proc ifdef FEATURE_INTERPRETER extern ExecuteInterpretedMethod:proc extern GetInterpThreadContextWithPossiblyMissingThreadOrCallStub:proc +extern CallInterpreterFuncletWorker:proc endif extern g_pPollGC:QWORD @@ -559,7 +560,7 @@ ifdef FEATURE_INTERPRETER NESTED_ENTRY InterpreterStub, _TEXT - PROLOG_WITH_TRANSITION_BLOCK + PROLOG_WITH_TRANSITION_BLOCK 0, __InterpreterStubArgumentRegistersOffset = __PWTB_ArgumentRegisters ; IR bytecode address @@ -1286,6 +1287,56 @@ END_PROLOGUE ret NESTED_END CallJittedMethodRetU2, _TEXT +;========================================================================== +; Create a real TransitionBlock and call CallInterpreterFuncletWorker +; to execute an interpreter funclet (catch/finally/filter handler). +; +; extern "C" DWORD_PTR CallInterpreterFunclet( +; OBJECTREF throwable, // rcx +; void* pHandler, // rdx +; REGDISPLAY *pRD, // r8 +; ExInfo *pExInfo, // r9 +; bool isFilter // [rsp+28h] +; ); +;========================================================================== +extern CallInterpreterFuncletWorker:proc + +NESTED_ENTRY CallInterpreterFunclet, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK 16, + + ; Pass TransitionBlock* as last (6th) argument on stack + ; Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + ; Original args: rcx=throwable, rdx=pHandler, r8=pRD, r9=pExInfo, [rsp+__PWTB_ArgumentRegisters+20h]=isFilter + + ; Move isFilter to 5th param slot + mov rax, [rsp + __PWTB_ArgumentRegisters + 20h] ; isFilter (5th param from original caller) + mov [rsp + 20h], rax ; pass isFilter as 5th param on stack + + ; Put TransitionBlock* as 6th param on stack + lea rax, [rsp + __PWTB_TransitionBlock] + mov [rsp + 28h], rax ; TransitionBlock* as 6th param + + ; rcx, rdx, r8, r9 remain unchanged (throwable, pHandler, pRD, pExInfo) + + call CallInterpreterFuncletWorker + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END CallInterpreterFunclet, _TEXT + +extern AsyncHelpers_ResumeInterpreterContinuationWorker:proc + +NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT + PROLOG_WITH_TRANSITION_BLOCK 0, + + lea r8, [rsp + __PWTB_TransitionBlock] + call AsyncHelpers_ResumeInterpreterContinuationWorker + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT + endif ; FEATURE_INTERPRETER ;========================================================================== diff --git a/src/coreclr/vm/amd64/AsmMacros.inc b/src/coreclr/vm/amd64/AsmMacros.inc index 07531371d6627d..abd8096f4a61db 100644 --- a/src/coreclr/vm/amd64/AsmMacros.inc +++ b/src/coreclr/vm/amd64/AsmMacros.inc @@ -355,13 +355,18 @@ RESTORE_FLOAT_ARGUMENT_REGISTERS macro ofs ; xmm2 ; xmm1 ; xmm0 <- __PWTB_FloatArgumentRegisters +; Optional: Callee saved floating point registers +; xmm15 +; . +; . +; xmm6 ; extra locals + padding to qword align ; callee's r9 ; callee's r8 ; callee's rdx ; callee's rcx -PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, stackAllocOnEntry := <0>, stackAllocSpill1, stackAllocSpill2, stackAllocSpill3 +PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, calleeSavedFloatRegs := , stackAllocOnEntry := <0>, stackAllocSpill1, stackAllocSpill2, stackAllocSpill3 __PWTB_FloatArgumentRegisters = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES + extraLocals @@ -369,6 +374,11 @@ PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, stackAllocOnEntry := <0>, __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 8 endif + ifidn , + __PWTB_FloatCalleeSavedRegisters = __PWTB_FloatArgumentRegisters + __PWTB_FloatArgumentRegisters = __PWTB_FloatArgumentRegisters + 10 * 16 + endif + __PWTB_StackAlloc = __PWTB_FloatArgumentRegisters + 4 * 16 + 8 __PWTB_TransitionBlock = __PWTB_StackAlloc __PWTB_ArgumentRegisters = __PWTB_StackAlloc + 9 * 8 @@ -403,6 +413,19 @@ PROLOG_WITH_TRANSITION_BLOCK macro extraLocals := <0>, stackAllocOnEntry := <0>, SAVE_ARGUMENT_REGISTERS __PWTB_ArgumentRegisters SAVE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters + ifidn , + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters], xmm6 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 10h], xmm7 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 20h], xmm8 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 30h], xmm9 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 40h], xmm10 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 50h], xmm11 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 60h], xmm12 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 70h], xmm13 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 80h], xmm14 + movdqa [rsp + __PWTB_FloatCalleeSavedRegisters + 90h], xmm15 + endif + if stackAllocOnEntry ge 3*8 mov stackAllocSpill3, [rsp + __PWTB_StackAlloc + 28h] save_reg_postrsp r13, __PWTB_StackAlloc + 28h @@ -490,9 +513,9 @@ POP_COOP_PINVOKE_FRAME macro ; need to capture the complete register state including FP callee-saved registers. ; ; Stack layout (from high to low address after prologue): +; Outgoing argument homes (32 bytes) ; Return address (m_ReturnAddress) ; CalleeSavedRegisters (r15, r14, r13, r12, rbp, rbx, rsi, rdi - 64 bytes) <- TransitionBlock starts here -; Outgoing argument homes (32 bytes) ; FloatArgumentRegisters (xmm0-xmm3, 64 bytes) ; FP Callee-saved registers (xmm6-xmm15, 160 bytes) ; Shadow space for call (32 bytes) diff --git a/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm b/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm index 8fba12d4c6e6c1..fcdac50f939199 100644 --- a/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm +++ b/src/coreclr/vm/amd64/ExternalMethodFixupThunk.asm @@ -14,7 +14,7 @@ ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall, _TEXT - PROLOG_WITH_TRANSITION_BLOCK 0, 10h, r8, r9 + PROLOG_WITH_TRANSITION_BLOCK 0, DoNotPushCalleeSavedFloatRegs, 10h, r8, r9 lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock mov rdx, rax ; pIndirection @@ -33,7 +33,7 @@ DYNAMICHELPER macro frameFlags, suffix NESTED_ENTRY DelayLoad_Helper&suffix, _TEXT - PROLOG_WITH_TRANSITION_BLOCK 8h, 10h, r8, r9 + PROLOG_WITH_TRANSITION_BLOCK 8h, DoNotPushCalleeSavedFloatRegs, 10h, r8, r9 mov qword ptr [rsp + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES], frameFlags lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock diff --git a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm index 249c5dd91f5539..c0e10f61ed4f60 100644 --- a/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm +++ b/src/coreclr/vm/amd64/VirtualCallStubAMD64.asm @@ -24,7 +24,7 @@ INITIAL_SUCCESS_COUNT equ 100h NESTED_ENTRY ResolveWorkerAsmStub, _TEXT - PROLOG_WITH_TRANSITION_BLOCK 0, 8, r8 + PROLOG_WITH_TRANSITION_BLOCK 0, , 8, r8 ; token stored in r8 by prolog diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 80d121da71e7b5..2d71f26409bc41 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -1996,6 +1996,46 @@ END_PROLOGUE ret NESTED_END CallJittedMethodRetDoubleDouble, _TEXT +//========================================================================== +// Create a real TransitionBlock and call CallInterpreterFuncletWorker +// to execute an interpreter funclet (catch/finally/filter handler). +// +// extern "C" DWORD_PTR CallInterpreterFunclet( +// OBJECTREF throwable, // rdi +// void* pHandler, // rsi +// REGDISPLAY *pRD, // rdx +// ExInfo *pExInfo, // rcx +// bool isFilter // r8 +// ); +//========================================================================== +NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK + + // Pass TransitionBlock* as last (6th) argument + // Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + // Original args: rdi=throwable, rsi=pHandler, rdx=pRD, rcx=pExInfo, r8=isFilter + + lea r9, [rsp + __PWTB_TransitionBlock] // TransitionBlock* as 6th param (r9) + + // rdi, rsi, rdx, rcx, r8 remain unchanged (throwable, pHandler, pRD, pExInfo, isFilter) + + call C_FUNC(CallInterpreterFuncletWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END CallInterpreterFunclet, _TEXT + +NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT, NoHandler + PROLOG_WITH_TRANSITION_BLOCK + + lea rdx, [rsp + __PWTB_TransitionBlock] + call C_FUNC(AsyncHelpers_ResumeInterpreterContinuationWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT + #endif // FEATURE_INTERPRETER // ------------------------------------------------------------------ diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index 977c97ed77b808..8e48902c3b529f 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -88,8 +88,6 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl if (updateFloats) { UpdateFloatingPointRegisters(pRD, GetSP()); - _ASSERTE(pRD->pCurrentContext->Rip == GetReturnAddress()); - _ASSERTE(pRD->pCurrentContext->Rsp == GetSP()); } #endif // DACCESS_COMPILE @@ -138,6 +136,28 @@ void ResolveHelperFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updat } #endif // FEATURE_RESOLVE_HELPER_DISPATCH +#ifdef FEATURE_INTERPRETER +#ifndef DACCESS_COMPILE +void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR) +{ + LIMITED_METHOD_CONTRACT; + +#ifndef UNIX_AMD64_ABI + // The interpreter frame saves the floating point registers in the TransitionBlock, so we need to update them in the REGDISPLAY when we update the REGDISPLAY for an interpreter frame. + // Note: Unix AMD64 ABI has no callee-saved floating point registers, so this is Windows-only. + // FP callee-saved are at TransitionBlock - 232 (8 for stack alignment + 4 * 16 for FP argument registers + 10 * 16 for callee saved floating point registers). + TADDR pTransitionBlock = GetTransitionBlock(); + M128A *pCalleeSavedFloats = (M128A*)((BYTE*)pTransitionBlock - 232); + for (int i = 0; i < 10; i++) + { + (&pRD->pCurrentContext->Xmm6)[i] = pCalleeSavedFloats[i]; + (&pRD->pCurrentContextPointers->Xmm6)[i] = &pCalleeSavedFloats[i]; + } +#endif // !UNIX_AMD64_ABI +} +#endif // DACCESS_COMPILE +#endif // FEATURE_INTERPRETER + void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { CONTRACTL @@ -161,7 +181,7 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF #ifndef DACCESS_COMPILE if (updateFloats) { - UpdateFloatingPointRegisters(pRD); + UpdateFloatingPointRegisters(pRD, dac_cast(GetCallSiteSP())); // The float updating unwinds the stack so the pRD->pCurrentContext->Rip contains correct unwound Rip // This is used for exception handling and the Rip extracted from m_pCallerReturnAddress is slightly // off, which causes problem with searching for the return address on shadow stack on x64, so diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 2af508a0df973c..58eb054f468c62 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1254,7 +1254,7 @@ NESTED_ENTRY CallJittedMethodRetU2, _TEXT, NoHandler NESTED_END CallJittedMethodRetU2, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 // IR bytecode address mov r4, METHODDESC_REGISTER // InterpMethod @@ -1368,4 +1368,72 @@ NESTED_ENTRY InterpreterStubRetBuffR1, _TEXT, NoHandler EPILOG_POP {pc} NESTED_END InterpreterStubRetBuffR1, _TEXT +// ------------------------------------------------------------------ +// Create a real TransitionBlock and call CallInterpreterFuncletWorker +// to execute an interpreter funclet (catch/finally/filter handler). +// +// extern "C" DWORD_PTR CallInterpreterFunclet( +// OBJECTREF throwable, // r0 +// void* pHandler, // r1 +// REGDISPLAY *pRD, // r2 +// ExInfo *pExInfo, // r3 +// bool isFilter // [sp, #0] +// ); +// ------------------------------------------------------------------ +NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + // Original args: r0=throwable, r1=pHandler, r2=pRD, r3=pExInfo, isFilter on stack + // For ARM32, first 4 args go in r0-r3, args 5-6 go on stack + + // First, allocate space for stack args (2 words for isFilter and TransitionBlock*) + sub sp, sp, #8 + + // Put TransitionBlock* as 6th param (2nd stack arg) + add r12, sp, #8 + __PWTB_TransitionBlock // TransitionBlock* + str r12, [sp, #4] + + // Load isFilter from original stack location and store as 5th param (1st stack arg) + // After PROLOG_WITH_TRANSITION_BLOCK, original stack args are at __PWTB_TransitionBlock offset + // The 5th param (isFilter) was pushed before our stack allocation + ldr r12, [sp, #8 + __PWTB_TransitionBlock + SIZEOF__ArgumentRegisters] + str r12, [sp, #0] + + // r0-r3 remain unchanged + + CHECK_STACK_ALIGNMENT + bl C_FUNC(CallInterpreterFuncletWorker) + + add sp, sp, #8 + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END CallInterpreterFunclet, _TEXT + +// ------------------------------------------------------------------ +// Resume an interpreter continuation after an async await. +// The worker function will restore callee-saved registers from the +// TransitionBlock. +// +// FCDECL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage); +// +// ------------------------------------------------------------------ +NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Worker signature: AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, TransitionBlock*) + // r0, r1 remain unchanged + + add r2, sp, #__PWTB_TransitionBlock // TransitionBlock* as 3rd param (r2) + + CHECK_STACK_ALIGNMENT + bl C_FUNC(AsyncHelpers_ResumeInterpreterContinuationWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index 6609dc48f9a957..8964c627beb82f 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -1244,7 +1244,6 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl if (updateFloats) { UpdateFloatingPointRegisters(pRD, GetSP()); - _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress()); } #endif // DACCESS_COMPILE @@ -1275,6 +1274,29 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay_Impl(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); } +#ifdef FEATURE_INTERPRETER +#ifndef DACCESS_COMPILE +void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR) +{ + LIMITED_METHOD_CONTRACT; + + // The interpreter frame saves the floating point callee-saved registers (d8-d15) in the TransitionBlock, + // so we need to update them in the REGDISPLAY when we update the REGDISPLAY for an interpreter frame. + // + // Stack layout when pushCalleeSavedFloatRegs is used: + // [d8-d15 (64 bytes)] [padding (4 bytes)] [d0-d7 (64 bytes)] [padding (4 bytes)] [TransitionBlock] + // FP callee-saved are at TransitionBlock - 136 (64 + 4 + 64 + 4) + TADDR pTransitionBlock = GetTransitionBlock(); + UINT64 *pCalleeSavedFloats = (UINT64*)((BYTE*)pTransitionBlock - 136); + + for (int i = 0; i < 8; i++) + { + pRD->pCurrentContext->D[8 + i] = pCalleeSavedFloats[i]; + } +} +#endif // DACCESS_COMPILE +#endif // FEATURE_INTERPRETER + void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { LIMITED_METHOD_DAC_CONTRACT; @@ -1329,7 +1351,7 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF #ifndef DACCESS_COMPILE if (updateFloats) { - UpdateFloatingPointRegisters(pRD); + UpdateFloatingPointRegisters(pRD, dac_cast(GetCallSiteSP())); } #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 905614cbf60251..60889ac0b9fa09 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -686,9 +686,9 @@ LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler #ifdef TARGET_APPLE - PROLOG_WITH_TRANSITION_BLOCK extraLocals=8*16, SaveFPArgs=0,SaveGPArgs=0 + PROLOG_WITH_TRANSITION_BLOCK extraLocals=8*16, SaveFPArgs=0,SaveGPArgs=0, pushCalleeSavedFloatRegs=1 #else - PROLOG_WITH_TRANSITION_BLOCK + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 #endif // IR bytecode address @@ -3287,6 +3287,58 @@ LEAF_ENTRY SwiftLoweredReturnTerminator LEAF_END SwiftLoweredReturnTerminator #endif // TARGET_APPLE +// ------------------------------------------------------------------ +// Create a real TransitionBlock and call CallInterpreterFuncletWorker +// to execute an interpreter funclet (catch/finally/filter handler). +// +// extern "C" DWORD_PTR CallInterpreterFunclet( +// OBJECTREF throwable, // x0 +// void* pHandler, // x1 +// REGDISPLAY *pRD, // x2 +// ExInfo *pExInfo, // x3 +// bool isFilter // x4 +// ); +// ------------------------------------------------------------------ +NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Pass TransitionBlock* as last (6th) argument + // Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + // Original args: x0=throwable, x1=pHandler, x2=pRD, x3=pExInfo, x4=isFilter + // x0-x4 remain unchanged + + add x5, sp, #__PWTB_TransitionBlock // TransitionBlock* as 6th param (x5) + + bl C_FUNC(CallInterpreterFuncletWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END CallInterpreterFunclet, _TEXT + +// ------------------------------------------------------------------ +// Resume an interpreter continuation after an async await. +// The worker function will restore callee-saved registers from the +// TransitionBlock. +// +// FCDECL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage); +// +// ------------------------------------------------------------------ +NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Worker signature: AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, TransitionBlock*) + // x0, x1 remain unchanged + + add x2, sp, #__PWTB_TransitionBlock // TransitionBlock* as 3rd param (x2) + + bl C_FUNC(AsyncHelpers_ResumeInterpreterContinuationWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index c2ed89b4ed14c5..8bdc98ab21ada3 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1094,7 +1094,7 @@ JIT_PollGCRarePath NESTED_ENTRY InterpreterStub - PROLOG_WITH_TRANSITION_BLOCK + PROLOG_WITH_TRANSITION_BLOCK , , PushCalleeSavedFloatRegs INLINE_GETTHREAD x20, x19 mov x19, METHODDESC_REGISTER ; x19 contains IR bytecode address @@ -3136,6 +3136,64 @@ CopyLoop EPILOG_RETURN NESTED_END CallJittedMethodRet4Vector128 +;; ------------------------------------------------------------------ +;; Create a real TransitionBlock and call CallInterpreterFuncletWorker +;; to execute an interpreter funclet (catch/finally/filter handler). +;; +;; extern "C" DWORD_PTR CallInterpreterFunclet( +;; OBJECTREF throwable, ; x0 +;; void* pHandler, ; x1 +;; REGDISPLAY *pRD, ; x2 +;; ExInfo *pExInfo, ; x3 +;; bool isFilter ; x4 +;; ); +;; ------------------------------------------------------------------ + IMPORT CallInterpreterFuncletWorker + + NESTED_ENTRY CallInterpreterFunclet + + PROLOG_WITH_TRANSITION_BLOCK , , PushCalleeSavedFloatRegs + + ; Pass TransitionBlock* as last (6th) argument + ; Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + ; Original args: x0=throwable, x1=pHandler, x2=pRD, x3=pExInfo, x4=isFilter + ; x0-x4 remain unchanged + + add x5, sp, #__PWTB_TransitionBlock ; TransitionBlock* as 6th param (x5) + + bl CallInterpreterFuncletWorker + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + + NESTED_END CallInterpreterFunclet + +;; ------------------------------------------------------------------ +;; Resume an interpreter continuation after an async await. +;; The worker function will restore callee-saved registers from the +;; TransitionBlock. +;; +;; extern "C" ContinuationObject* AsyncHelpers_ResumeInterpreterContinuation( +;; ContinuationObject* cont, // x0 +;; uint8_t* resultStorage // x1 +;; ); +;; ------------------------------------------------------------------ + IMPORT AsyncHelpers_ResumeInterpreterContinuationWorker + + NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation + + PROLOG_WITH_TRANSITION_BLOCK , , PushCalleeSavedFloatRegs + + ; Worker signature: AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, TransitionBlock*) + ; x0, x1 remain unchanged + + add x2, sp, #__PWTB_TransitionBlock ; TransitionBlock* as 3rd param (x2) + + bl AsyncHelpers_ResumeInterpreterContinuationWorker + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + + NESTED_END AsyncHelpers_ResumeInterpreterContinuation + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/arm64/asmmacros.h b/src/coreclr/vm/arm64/asmmacros.h index 93778d775f87c9..908678f943f65a 100644 --- a/src/coreclr/vm/arm64/asmmacros.h +++ b/src/coreclr/vm/arm64/asmmacros.h @@ -32,11 +32,16 @@ ;----------------------------------------------------------------------------- ; Define the prolog for a TransitionFrame-based method. This macro should be called first in the method and ; comprises the entire prolog (i.e. don't modify SP after calling this).The locals must be 8 byte aligned +; +; $pushCalleeSavedFloatRegs - Optional parameter. If set to PushCalleeSavedFloatRegs, the macro will also save +; the callee-saved floating point registers (d8-d15) to the stack. These registers +; are NOT restored by the EPILOG_WITH_TRANSITION_BLOCK variants. ; MACRO - PROLOG_WITH_TRANSITION_BLOCK $extraLocals, $SaveFPArgs + PROLOG_WITH_TRANSITION_BLOCK $extraLocals, $SaveFPArgs, $pushCalleeSavedFloatRegs GBLA __PWTB_FloatArgumentRegisters + GBLA __PWTB_FloatCalleeSavedRegisters GBLA __PWTB_ArgumentRegisters GBLA __PWTB_ArgumentRegister_FirstArg ; We save the x8 register ahead of the first argument, so this ; is different from the start of the argument register save area. @@ -51,13 +56,20 @@ __PWTB_SaveFPArgs SETL {true} ENDIF IF "$extraLocals" != "" -__PWTB_FloatArgumentRegisters SETA $extraLocals +__PWTB_FloatCalleeSavedRegisters SETA $extraLocals ELSE -__PWTB_FloatArgumentRegisters SETA 0 +__PWTB_FloatCalleeSavedRegisters SETA 0 + ENDIF + + IF __PWTB_FloatCalleeSavedRegisters:MOD:16 != 0 +__PWTB_FloatCalleeSavedRegisters SETA __PWTB_FloatCalleeSavedRegisters + 8 ENDIF - IF __PWTB_FloatArgumentRegisters:MOD:16 != 0 -__PWTB_FloatArgumentRegisters SETA __PWTB_FloatArgumentRegisters + 8 + ; If PushCalleeSavedFloatRegs is specified, reserve space for d8-d15 (8 registers * 8 bytes = 64 bytes) + IF "$pushCalleeSavedFloatRegs" == "PushCalleeSavedFloatRegs" +__PWTB_FloatArgumentRegisters SETA __PWTB_FloatCalleeSavedRegisters + 64 + ELSE +__PWTB_FloatArgumentRegisters SETA __PWTB_FloatCalleeSavedRegisters ENDIF IF __PWTB_SaveFPArgs @@ -88,6 +100,14 @@ __PWTB_ArgumentRegister_FirstArg SETA __PWTB_ArgumentRegisters + 8 SAVE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters ENDIF + ; Save callee-saved floating point registers if requested + IF "$pushCalleeSavedFloatRegs" == "PushCalleeSavedFloatRegs" + stp d8, d9, [sp, #__PWTB_FloatCalleeSavedRegisters] + stp d10, d11, [sp, #(__PWTB_FloatCalleeSavedRegisters + 16)] + stp d12, d13, [sp, #(__PWTB_FloatCalleeSavedRegisters + 32)] + stp d14, d15, [sp, #(__PWTB_FloatCalleeSavedRegisters + 48)] + ENDIF + MEND ;----------------------------------------------------------------------------- diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index ec6377012cef5a..139a821fbd20bf 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -218,7 +218,6 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl if (updateFloats) { UpdateFloatingPointRegisters(pRD, GetSP()); - _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress()); } #endif // DACCESS_COMPILE @@ -278,6 +277,31 @@ void ResolveHelperFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updat } #endif // FEATURE_RESOLVE_HELPER_DISPATCH +#ifdef FEATURE_INTERPRETER +#ifndef DACCESS_COMPILE +void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR) +{ + LIMITED_METHOD_CONTRACT; + + // The interpreter frame saves the floating point callee-saved registers (d8-d15) in the TransitionBlock, + // so we need to update them in the REGDISPLAY when we update the REGDISPLAY for an interpreter frame. + // + // Stack layout when pushCalleeSavedFloatRegs is used: + // [d8-d15 (64 bytes)] [q0-q7 (128 bytes)] [TransitionBlock] + // FP callee-saved are at TransitionBlock - 192 (64 + 128) + TADDR pTransitionBlock = GetTransitionBlock(); + UINT64 *pCalleeSavedFloats = (UINT64*)((BYTE*)pTransitionBlock - 192); + + for (int i = 0; i < 8; i++) + { + pRD->pCurrentContext->V[8 + i].Low = pCalleeSavedFloats[i]; + pRD->pCurrentContext->V[8 + i].High = 0; + (&pRD->pCurrentContextPointers->D8)[i] = &pCalleeSavedFloats[i]; + } +} +#endif // DACCESS_COMPILE +#endif // FEATURE_INTERPRETER + void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { LIMITED_METHOD_DAC_CONTRACT; @@ -338,7 +362,7 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF #ifndef DACCESS_COMPILE if (updateFloats) { - UpdateFloatingPointRegisters(pRD); + UpdateFloatingPointRegisters(pRD, dac_cast(GetCallSiteSP())); } #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/corelib.cpp b/src/coreclr/vm/corelib.cpp index 9c86d62f826c30..0861e8b748575f 100644 --- a/src/coreclr/vm/corelib.cpp +++ b/src/coreclr/vm/corelib.cpp @@ -74,6 +74,7 @@ #endif //FEATURE_PERFTRACING #include "tailcallhelp.h" +#include "interpexec.h" /////////////////////////////////////////////////////////////////////////////// // diff --git a/src/coreclr/vm/ecalllist.h b/src/coreclr/vm/ecalllist.h index 1776af6c065405..e5117f284fcb39 100644 --- a/src/coreclr/vm/ecalllist.h +++ b/src/coreclr/vm/ecalllist.h @@ -153,6 +153,12 @@ FCFuncStart(gRuntimeMethodHandle) FCFuncElement("GetLoaderAllocatorInternal", RuntimeMethodHandle::GetLoaderAllocatorInternal) FCFuncEnd() +#ifdef FEATURE_INTERPRETER +FCFuncStart(gAsyncHelpers) + FCFuncElement("ResumeInterpreterContinuation", AsyncHelpers_ResumeInterpreterContinuation) +FCFuncEnd() +#endif // FEATURE_INTERPRETER + FCFuncStart(gCOMFieldHandleNewFuncs) FCFuncElement("GetUtf8NameInternal", RuntimeFieldHandle::GetUtf8Name) FCFuncElement("GetAttributes", RuntimeFieldHandle::GetAttributes) @@ -386,6 +392,9 @@ FCFuncEnd() FCClassElement("Array", "System", gArrayFuncs) FCClassElement("AssemblyLoadContext", "System.Runtime.Loader", gAssemblyLoadContextFuncs) +#ifdef FEATURE_INTERPRETER +FCClassElement("AsyncHelpers", "System.Runtime.CompilerServices", gAsyncHelpers) +#endif FCClassElement("Buffer", "System", gBufferFuncs) FCClassElement("CastHelpers", "System.Runtime.CompilerServices", gCastHelpers) FCClassElement("Delegate", "System", gDelegateFuncs) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index cbbc14f04f4251..553d5d49f3f8dd 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1825,7 +1825,16 @@ void EECodeManager::UpdateSSP(PREGDISPLAY pRD) #endif // HOST_AMD64 && HOST_WINDOWS #ifdef FEATURE_INTERPRETER -DWORD_PTR InterpreterCodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter) + +#if !defined(TARGET_WASM) +// ASM helper that creates a TransitionBlock and calls CallInterpreterFuncletWorker +extern "C" DWORD_PTR STDCALL CallInterpreterFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter); +#endif // !TARGET_WASM + +// Worker function that executes an interpreter funclet with a TransitionBlock +// On non-WASM platforms, this is called from the ASM helper CallInterpreterFunclet +// On WASM, this is called directly from InterpreterCodeManager::CallFunclet +extern "C" DWORD_PTR STDCALL CallInterpreterFuncletWorker(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter, TransitionBlock *pTransitionBlock) { Thread *pThread = GetThread(); InterpThreadContext *threadContext = pThread->GetInterpThreadContext(); @@ -1843,7 +1852,7 @@ DWORD_PTR InterpreterCodeManager::CallFunclet(OBJECTREF throwable, void* pHandle { } } - frames(NULL); + frames(pTransitionBlock); // Use the InterpreterFrame address as a representation of the caller SP of the funclet // Note: this needs to match what the VirtualUnwindInterpreterCallFrame sets as the SP @@ -1882,6 +1891,19 @@ DWORD_PTR InterpreterCodeManager::CallFunclet(OBJECTREF throwable, void* pHandle } } +DWORD_PTR InterpreterCodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter) +{ +#if !defined(TARGET_WASM) + // Call the ASM helper which creates a real TransitionBlock + return CallInterpreterFunclet(throwable, pHandler, pRD, pExInfo, isFilter); +#else + // For WASM, create the TransitionBlock in C++ and call the worker directly + TransitionBlock transitionBlock{}; + transitionBlock.m_ReturnAddress = (TADDR)&CallInterpreterFuncletWorker; + return CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, &transitionBlock); +#endif +} + void InterpreterCodeManager::ResumeAfterCatch(CONTEXT *pContext, size_t targetSSP, bool fIntercepted) { TADDR resumeSP = GetSP(pContext); diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 78839be6791e61..6b9d52189dac84 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -3114,16 +3114,6 @@ void CallCatchFunclet(OBJECTREF throwable, BYTE* pHandlerIP, REGDISPLAY* pvRegDi DWORD_PTR dwResumePC = 0; UINT_PTR callerTargetSp = 0; -#ifdef FEATURE_INTERPRETER - if (GetControlPC(pvRegDisplay) == InterpreterFrame::DummyCallerIP) - { - // This is a case when we have unwound out of an interpreted filter funclet. The "Next" moves the - // REGDISPLAY to the native code context that was there before we started to iterate over the - // interpreted frames. - exInfo->m_frameIter.Next(); - } -#endif // FEATURE_INTERPRETER - #if defined(HOST_AMD64) && defined(HOST_WINDOWS) size_t targetSSP = exInfo->m_frameIter.m_crawl.GetRegisterSet()->SSP; // Verify the SSP points to the slot that matches the ControlPC of the frame containing the catch funclet. @@ -3967,45 +3957,6 @@ CLR_BOOL SfiNextWorker(StackFrameIterator* pThis, uint* uExCollideClauseIdx, CLR isNativeTransition = (pThis->GetFrameState() == StackFrameIterator::SFITER_NATIVE_MARKER_FRAME); -#ifdef FEATURE_INTERPRETER - bool nativeTransitionFrameIsNextFrame; - nativeTransitionFrameIsNextFrame = false; - - if (isNativeTransition && - (GetIP(pThis->m_crawl.GetRegisterSet()->pCurrentContext) == InterpreterFrame::DummyCallerIP)) - { - _ASSERTE(pThis->m_crawl.GetFrame()->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame); - InterpreterFrame *pInterpreterFrame = (InterpreterFrame *)pThis->m_crawl.GetFrame(); - // If the GetReturnAddress returns 0, it means the caller is InterpreterCodeManager::CallFunclet. - // We don't have any TransitionFrame to update the regdisplay from in that case. - PCODE returnAddress = pInterpreterFrame->GetReturnAddress(); - if (returnAddress != 0) - { - // The callerIP is InterpreterFrame::DummyCallerIP when we are going to unwind from the first interpreted frame belonging to an InterpreterFrame. - // That means it is at a transition where non-interpreted code called interpreted one. - // Move the stack frame iterator to the InterpreterFrame and extract the IP of the real caller of the interpreted code. - retVal = pThis->Next(); - _ASSERTE(retVal != SWA_FAILED); - _ASSERTE(pThis->GetFrameState() == StackFrameIterator::SFITER_FRAME_FUNCTION); - _ASSERTE(pThis->m_crawl.GetFrame()->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame); - if (ExecutionManager::IsManagedCode(returnAddress)) - { - // The caller of the interpreted code is managed code. Advance the stack frame iterator to that frame. - retVal = pThis->Next(); - _ASSERTE(retVal != SWA_FAILED); - _ASSERTE(pThis->GetFrameState() == StackFrameIterator::SFITER_FRAMELESS_METHOD); - isNativeTransition = false; - } - else - { - // The caller is native code, so we can update the regdisplay to point to it. - pInterpreterFrame->UpdateRegDisplay(pThis->m_crawl.GetRegisterSet(), /* updateFloats */ true); - nativeTransitionFrameIsNextFrame = true; - } - } - } -#endif // FEATURE_INTERPRETER - // Check for reverse pinvoke or CallDescrWorkerInternal. if (isNativeTransition) { @@ -4062,14 +4013,6 @@ CLR_BOOL SfiNextWorker(StackFrameIterator* pThis, uint* uExCollideClauseIdx, CLR if (isPropagatingToNativeCode) { pFrame = pThis->m_crawl.GetFrame(); -#ifdef FEATURE_INTERPRETER - if (nativeTransitionFrameIsNextFrame) - { - // Skip the InterpreterFrame that we determined earlier is NOT the frame we consider part of the native transition. - _ASSERTE(pFrame->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame); - pFrame = pFrame->PtrNextFrame(); - } -#endif // Check if there are any further managed frames on the stack or a catch for all exceptions in native code (marked by // DebuggerU2MCatchHandlerFrame with CatchesAllExceptions() returning true). diff --git a/src/coreclr/vm/frames.cpp b/src/coreclr/vm/frames.cpp index 5c7464439ed873..6c851c95e05989 100644 --- a/src/coreclr/vm/frames.cpp +++ b/src/coreclr/vm/frames.cpp @@ -635,9 +635,11 @@ void Frame::PopIfChained() #endif // TARGET_UNIX && !DACCESS_COMPILE #if (!defined(TARGET_X86) || defined(TARGET_UNIX)) && !defined(TARGET_WASM) -/* static */ -void Frame::UpdateFloatingPointRegisters(const PREGDISPLAY pRD, TADDR targetSP) + +void Frame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR targetSP) { + LIMITED_METHOD_CONTRACT; + // Default implementation unwinds floating point registers to target SP _ASSERTE(!ExecutionManager::IsManagedCode(::GetIP(pRD->pCurrentContext))); do @@ -653,13 +655,25 @@ void Frame::UpdateFloatingPointRegisters(const PREGDISPLAY pRD, TADDR targetSP) _ASSERTE(::GetSP(pRD->pCurrentContext) == targetSP); } -void InlinedCallFrame::UpdateFloatingPointRegisters(const PREGDISPLAY pRD) +void Frame::UpdateFloatingPointRegisters(const PREGDISPLAY pRD, TADDR targetSP) +{ + switch (GetFrameIdentifier()) + { +#define FRAME_TYPE_NAME(frameType) case FrameIdentifier::frameType: { return dac_cast(this)->UpdateFloatingPointRegisters_Impl(pRD, targetSP); } +#include "FrameTypes.h" + default: + FRAME_POLYMORPHIC_DISPATCH_UNREACHABLE(); + return; + } +} + +void InlinedCallFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR targetSP) { #ifdef FEATURE_INTERPRETER if (IsInInterpreter()) { InterpreterFrame *pInterpreterFrame = (InterpreterFrame *)m_Next; - Frame::UpdateFloatingPointRegisters(pRD, pInterpreterFrame->GetInterpExecMethodSP()); + pInterpreterFrame->UpdateFloatingPointRegisters(pRD, pInterpreterFrame->GetInterpExecMethodSP()); pInterpreterFrame->SetContextToInterpMethodContextFrame(pRD->pCurrentContext); return; } @@ -1964,6 +1978,7 @@ void InterpreterFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF { SyncRegDisplayToCurrentContext(pRD); TransitionFrame::UpdateRegDisplay_Impl(pRD, updateFloats); + #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) && !defined(DACCESS_COMPILE) // Update the SSP to match the updated regdisplay size_t *targetSSP = (size_t *)GetInterpExecMethodSSP(); diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 8ec28c47132c6e..6c60cc14fa94bd 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -596,7 +596,12 @@ class Frame #ifndef DACCESS_COMPILE #if (!defined(TARGET_X86) || defined(TARGET_UNIX)) && !defined(TARGET_WASM) - static void UpdateFloatingPointRegisters(const PREGDISPLAY pRD, TADDR targetSP); + // Pseudo-virtual method for updating floating point registers during stack walk + void UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR targetSP); +public: + // Public dispatch method for UpdateFloatingPointRegisters + void UpdateFloatingPointRegisters(const PREGDISPLAY pRD, TADDR targetSP); +protected: #endif // (!TARGET_X86 || TARGET_UNIX) && !TARGET_WASM #endif // DACCESS_COMPILE @@ -2233,7 +2238,7 @@ class InlinedCallFrame : public Frame #ifndef DACCESS_COMPILE #if (!defined(TARGET_X86) || defined(TARGET_UNIX)) && !defined(TARGET_WASM) - void UpdateFloatingPointRegisters(const PREGDISPLAY pRD); + void UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR targetSP); #endif // (!TARGET_X86 || TARGET_UNIX) && !TARGET_WASM #endif // DACCESS_COMPILE @@ -2511,6 +2516,12 @@ class InterpreterFrame : public FramedMethodFrame void ExceptionUnwind_Impl(); #endif +#ifndef DACCESS_COMPILE +#if (!defined(TARGET_X86) || defined(TARGET_UNIX)) && !defined(TARGET_WASM) + void UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR targetSP); +#endif // (!TARGET_X86 || TARGET_UNIX) && !TARGET_WASM +#endif // DACCESS_COMPILE + PTR_InterpMethodContextFrame GetTopInterpMethodContextFrame(); void SetContextToInterpMethodContextFrame(T_CONTEXT * pContext); diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index c6c3bba915867e..79d632a70ce988 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -993,13 +993,15 @@ void* DoGenericLookup(void* genericVarAsPtr, InterpGenericLookup* pLookup) return result; } -void AsyncHelpers_ResumeInterpreterContinuation(QCall::ObjectHandleOnStack cont, uint8_t* resultStorage) +extern "C" ContinuationObject* AsyncHelpers_ResumeInterpreterContinuationWorker(ContinuationObject* cont, uint8_t* resultStorage, TransitionBlock* pTransitionBlock) { - QCALL_CONTRACT; - - BEGIN_QCALL; - - GCX_COOP(); + CONTRACTL + { + THROWS; + GC_TRIGGERS; + MODE_COOPERATIVE; + } + CONTRACTL_END Thread *pThread = GetThread(); InterpThreadContext *threadContext = pThread->GetOrCreateInterpThreadContext(); @@ -1017,9 +1019,9 @@ void AsyncHelpers_ResumeInterpreterContinuation(QCall::ObjectHandleOnStack cont, { } } - frames(NULL); + frames(pTransitionBlock); - CONTINUATIONREF contRef = (CONTINUATIONREF)ObjectToOBJECTREF(cont.Get()); + CONTINUATIONREF contRef = (CONTINUATIONREF)ObjectToOBJECTREF(cont); NULL_CHECK(contRef); // We are working with an interpreter async continuation, move things around to get the InterpAsyncSuspendData @@ -1070,11 +1072,24 @@ void AsyncHelpers_ResumeInterpreterContinuation(QCall::ObjectHandleOnStack cont, } } - cont.Set(frames.interpreterFrame.GetContinuation()); + contRef = (CONTINUATIONREF)frames.interpreterFrame.GetContinuation(); frames.interpreterFrame.Pop(); - END_QCALL; + return (ContinuationObject*)OBJECTREFToObject(contRef); +} + +#ifdef TARGET_WASM +FCIMPL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage) +{ + STATIC_CONTRACT_WRAPPER; + + TransitionBlock transitionBlock{}; + transitionBlock.m_ReturnAddress = (TADDR)&AsyncHelpers_ResumeInterpreterContinuation; + + return AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, &transitionBlock); } +FCIMPLEND +#endif // TARGET_WASM static void DECLSPEC_NORETURN HandleInterpreterStackOverflow(InterpreterFrame* pInterpreterFrame) { diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index 149079b3ab04a2..3fde10f7127a7c 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -78,7 +78,7 @@ struct ExceptionClauseArgs }; void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFrame *pFrame, InterpThreadContext *pThreadContext, ExceptionClauseArgs *pExceptionClauseArgs = NULL); -extern "C" void AsyncHelpers_ResumeInterpreterContinuation(QCall::ObjectHandleOnStack cont, uint8_t* resultStorage); +EXTERN_C FCDECL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage); extern "C" void LookupMethodByName(const char* fullQualifiedTypeName, const char* methodName, MethodDesc** ppMD); extern "C" void ExecuteInterpretedMethodFromUnmanaged(MethodDesc* pMD, int8_t* args, size_t argSize, int8_t* ret, PCODE callerIp); diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp index 124c0d9739aa2a..1ca17e5161081e 100644 --- a/src/coreclr/vm/loongarch64/stubs.cpp +++ b/src/coreclr/vm/loongarch64/stubs.cpp @@ -269,7 +269,6 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl if (updateFloats) { UpdateFloatingPointRegisters(pRD, GetSP()); - _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress()); } #endif // DACCESS_COMPILE @@ -349,7 +348,7 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF #ifndef DACCESS_COMPILE if (updateFloats) { - UpdateFloatingPointRegisters(pRD); + UpdateFloatingPointRegisters(pRD, dac_cast(GetCallSiteSP())); } #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/qcallentrypoints.cpp b/src/coreclr/vm/qcallentrypoints.cpp index 0a03fbce6b82a2..efcacbbcbb93dd 100644 --- a/src/coreclr/vm/qcallentrypoints.cpp +++ b/src/coreclr/vm/qcallentrypoints.cpp @@ -540,9 +540,6 @@ static const Entry s_QCall[] = DllImportEntry(SystemJS_ScheduleTimer) DllImportEntry(SystemJS_ScheduleBackgroundJob) #endif // TARGET_BROWSER -#ifdef FEATURE_INTERPRETER - DllImportEntry(AsyncHelpers_ResumeInterpreterContinuation) -#endif // FEATURE_INTERPRETER }; const void* QCallResolveDllImport(const char* name) diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index e0a0af3c9b84bb..0d14aab155fc4f 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -1403,7 +1403,7 @@ NESTED_END CallJittedMethodRetIntFloat, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler - PROLOG_WITH_TRANSITION_BLOCK + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 // IR bytecode address mv t6, METHODDESC_REGISTER @@ -2578,4 +2578,56 @@ LEAF_ENTRY Store_FA7 EPILOG_BRANCH_REG t4 LEAF_END Store_FA7 +// ------------------------------------------------------------------ +// Create a real TransitionBlock and call CallInterpreterFuncletWorker +// to execute an interpreter funclet (catch/finally/filter handler). +// +// extern "C" DWORD_PTR CallInterpreterFunclet( +// OBJECTREF throwable, // a0 +// void* pHandler, // a1 +// REGDISPLAY *pRD, // a2 +// ExInfo *pExInfo, // a3 +// bool isFilter // a4 +// ); +// ------------------------------------------------------------------ +NESTED_ENTRY CallInterpreterFunclet, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Pass TransitionBlock* as last (6th) argument + // Worker signature: CallInterpreterFuncletWorker(throwable, pHandler, pRD, pExInfo, isFilter, TransitionBlock*) + // Original args: a0=throwable, a1=pHandler, a2=pRD, a3=pExInfo, a4=isFilter + // a0-a4 remain unchanged + + addi a5, sp, __PWTB_TransitionBlock // TransitionBlock* as 6th param (a5) + + call C_FUNC(CallInterpreterFuncletWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END CallInterpreterFunclet, _TEXT + +// ------------------------------------------------------------------ +// Resume an interpreter continuation after an async await. +// The worker function will restore callee-saved registers from the +// TransitionBlock. +// +// FCDECL2(ContinuationObject*, AsyncHelpers_ResumeInterpreterContinuation, ContinuationObject* cont, uint8_t* resultStorage); +// +// ------------------------------------------------------------------ +NESTED_ENTRY AsyncHelpers_ResumeInterpreterContinuation, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK pushCalleeSavedFloatRegs=1 + + // Worker signature: AsyncHelpers_ResumeInterpreterContinuationWorker(cont, resultStorage, TransitionBlock*) + // a0, a1 remain unchanged + + addi a2, sp, __PWTB_TransitionBlock // TransitionBlock* as 3rd param (a2) + + call C_FUNC(AsyncHelpers_ResumeInterpreterContinuationWorker) + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END AsyncHelpers_ResumeInterpreterContinuation, _TEXT + #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp index 0ef69ee0f10355..91c082f6c7c041 100644 --- a/src/coreclr/vm/riscv64/stubs.cpp +++ b/src/coreclr/vm/riscv64/stubs.cpp @@ -223,7 +223,6 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl if (updateFloats) { UpdateFloatingPointRegisters(pRD, GetSP()); - _ASSERTE(pRD->pCurrentContext->Pc == GetReturnAddress()); } #endif // DACCESS_COMPILE @@ -248,6 +247,36 @@ void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFl LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay_Impl(pc:%p, sp:%p)\n", pRD->ControlPC, pRD->SP)); } +#ifdef FEATURE_INTERPRETER +#ifndef DACCESS_COMPILE +void InterpreterFrame::UpdateFloatingPointRegisters_Impl(const PREGDISPLAY pRD, TADDR) +{ + LIMITED_METHOD_CONTRACT; + + // The interpreter frame saves the floating point callee-saved registers (fs0-fs11) in the TransitionBlock, + // so we need to update them in the REGDISPLAY when we update the REGDISPLAY for an interpreter frame. + // + // Stack layout when pushCalleeSavedFloatRegs is used: + // [fs0-fs11 (96 bytes)] [fa0-fa7 (64 bytes)] [TransitionBlock] + // FP callee-saved are at TransitionBlock - 160 (96 + 64) + // + // RISC-V FP callee-saved: fs0=f8, fs1=f9, fs2-fs11=f18-f27 + TADDR pTransitionBlock = GetTransitionBlock(); + UINT64 *pCalleeSavedFloats = (UINT64*)((BYTE*)pTransitionBlock - 160); + + // fs0 = f8, fs1 = f9 + pRD->pCurrentContext->F[8] = pCalleeSavedFloats[0]; + pRD->pCurrentContext->F[9] = pCalleeSavedFloats[1]; + + // fs2-fs11 = f18-f27 + for (int i = 0; i < 10; i++) + { + pRD->pCurrentContext->F[18 + i] = pCalleeSavedFloats[2 + i]; + } +} +#endif // DACCESS_COMPILE +#endif // FEATURE_INTERPRETER + void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { LIMITED_METHOD_DAC_CONTRACT; @@ -307,7 +336,7 @@ void InlinedCallFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateF #ifndef DACCESS_COMPILE if (updateFloats) { - UpdateFloatingPointRegisters(pRD); + UpdateFloatingPointRegisters(pRD, dac_cast(GetCallSiteSP())); } #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index ed5de14ac839f5..7afc296e7472f8 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -2155,6 +2155,14 @@ StackWalkAction StackFrameIterator::NextRaw(void) goto Cleanup; } +#ifdef FEATURE_INTERPRETER + if (GetIP(m_crawl.pRD->pCurrentContext) == InterpreterFrame::DummyCallerIP) + { + PTR_InterpreterFrame pInterpreterFrame = dac_cast(GetSP(m_crawl.pRD->pCurrentContext)); + pInterpreterFrame->UpdateRegDisplay(m_crawl.pRD, m_flags & UNWIND_FLOATS); + } +#endif // FEATURE_INTERPRETER + #define FAIL_IF_SPECULATIVE_WALK(condition) \ if (m_flags & PROFILER_DO_STACK_SNAPSHOT) \ { \ @@ -2208,6 +2216,14 @@ StackWalkAction StackFrameIterator::NextRaw(void) if (InlinedCallFrame::FrameHasActiveCall(m_crawl.pFrame)) { pInlinedFrame = m_crawl.pFrame; +#ifdef FEATURE_INTERPRETER + PTR_Frame pNextFrame = pInlinedFrame->PtrNextFrame(); + if ((pNextFrame != FRAME_TOP) && (pNextFrame->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame)) + { + m_crawl.GotoNextFrame(); + goto Cleanup; + } +#endif // FEATURE_INTERPRETER } unsigned uFrameAttribs = m_crawl.pFrame->GetFrameAttribs(); @@ -2225,40 +2241,59 @@ StackWalkAction StackFrameIterator::NextRaw(void) m_crawl.isIPadjusted = false; } - PCODE adr = m_crawl.pFrame->GetReturnAddress(); - _ASSERTE(adr != (PCODE)POISONC); - - _ASSERTE(!pInlinedFrame || adr); +#ifdef FEATURE_INTERPRETER + if (m_crawl.pFrame->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame) + { + LOG((LF_GCROOTS, LL_INFO10000, "STACKWALK: Switching to interpreted frames for InterpreterFrame %p\n", m_crawl.pFrame)); + ((PTR_InterpreterFrame)m_crawl.pFrame)->SetContextToInterpMethodContextFrame(m_crawl.GetRegisterSet()->pCurrentContext); + SyncRegDisplayToCurrentContext(m_crawl.GetRegisterSet()); + ProcessIp(GetControlPC(m_crawl.pRD)); - if (adr) + _ASSERTE(m_crawl.GetCodeInfo()->IsValid()); + if (m_crawl.GetRegisterSet()->pCurrentContext->ContextFlags & CONTEXT_EXCEPTION_ACTIVE) + { + m_crawl.isInterrupted = true; + m_crawl.hasFaulted = true; + } + } + else +#endif // FEATURE_INTERPRETER { - ProcessIp(adr); + PCODE adr = m_crawl.pFrame->GetReturnAddress(); + _ASSERTE(adr != (PCODE)POISONC); - _ASSERTE(m_crawl.GetCodeInfo()->IsValid() || !pInlinedFrame); + _ASSERTE(!pInlinedFrame || adr); - if (m_crawl.isFrameless) + if (adr) { - m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD, m_flags & UNWIND_FLOATS); + ProcessIp(adr); - CONSISTENCY_CHECK(NULL == m_pvResumableFrameTargetSP); + _ASSERTE(m_crawl.GetCodeInfo()->IsValid() || !pInlinedFrame); - if (m_crawl.isFirst) + if (m_crawl.isFrameless) { - if (m_flags & THREAD_IS_SUSPENDED) + m_crawl.pFrame->UpdateRegDisplay(m_crawl.pRD, m_flags & UNWIND_FLOATS); + + CONSISTENCY_CHECK(NULL == m_pvResumableFrameTargetSP); + + if (m_crawl.isFirst) { - _ASSERTE(m_crawl.isProfilerDoStackSnapshot); + if (m_flags & THREAD_IS_SUSPENDED) + { + _ASSERTE(m_crawl.isProfilerDoStackSnapshot); - // abort the stackwalk, we can't proceed without risking deadlock - retVal = SWA_FAILED; - goto Cleanup; - } + // abort the stackwalk, we can't proceed without risking deadlock + retVal = SWA_FAILED; + goto Cleanup; + } - // we are about to unwind, which may take a lock, so the thread - // better not be suspended. - CONSISTENCY_CHECK(!(m_flags & THREAD_IS_SUSPENDED)); + // we are about to unwind, which may take a lock, so the thread + // better not be suspended. + CONSISTENCY_CHECK(!(m_flags & THREAD_IS_SUSPENDED)); - m_crawl.GetCodeManager()->EnsureCallerContextIsValid(m_crawl.pRD, NULL, m_codeManFlags); - m_pvResumableFrameTargetSP = (LPVOID)GetSP(m_crawl.pRD->pCallerContext); + m_crawl.GetCodeManager()->EnsureCallerContextIsValid(m_crawl.pRD, NULL, m_codeManFlags); + m_pvResumableFrameTargetSP = (LPVOID)GetSP(m_crawl.pRD->pCallerContext); + } } } } @@ -2403,68 +2438,6 @@ void StackFrameIterator::ProcessCurrentFrame(void) return; } -#ifdef FEATURE_INTERPRETER - if (!m_crawl.isFrameless) - { - PREGDISPLAY pRD = m_crawl.GetRegisterSet(); - - if (m_crawl.pFrame->GetFrameIdentifier() == FrameIdentifier::InterpreterFrame) - { - if (GetIP(pRD->pCurrentContext) != (PCODE)InterpreterFrame::DummyCallerIP) - { - // We have hit the InterpreterFrame while we were not processing the interpreter frames. - // Switch to walking the underlying interpreted frames. - // Save the registers the interpreter frames walking reuses so that we can restore them - // after we are done with the interpreter frames. - LOG((LF_GCROOTS, LL_INFO10000, "STACKWALK: Switching to interpreted frames for InterpreterFrame %p, saving SP=%p, IP=%p\n", m_crawl.pFrame, GetIP(pRD->pCurrentContext), GetSP(pRD->pCurrentContext))); - m_interpExecMethodIP = GetIP(pRD->pCurrentContext); - m_interpExecMethodSP = GetSP(pRD->pCurrentContext); - m_interpExecMethodFP = GetFP(pRD->pCurrentContext); - m_interpExecMethodFirstArgReg = GetFirstArgReg(pRD->pCurrentContext); -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - m_interpExecMethodSSP = pRD->SSP; -#endif - ((PTR_InterpreterFrame)m_crawl.pFrame)->SetContextToInterpMethodContextFrame(pRD->pCurrentContext); - if (pRD->pCurrentContext->ContextFlags & CONTEXT_EXCEPTION_ACTIVE) - { - m_crawl.isInterrupted = true; - m_crawl.hasFaulted = true; - } - - SyncRegDisplayToCurrentContext(pRD); - ProcessIp(GetControlPC(pRD)); - } - else - { - // We have finished walking the interpreted frames. Process the InterpreterFrame itself. - // Restore the registers to the values they had before we started walking the interpreter frames. - LOG((LF_GCROOTS, LL_INFO10000, "STACKWALK: Completed walking of interpreted frames for InterpreterFrame %p, restoring SP=%p, IP=%p\n", m_crawl.pFrame, m_interpExecMethodSP, m_interpExecMethodIP)); - _ASSERTE(dac_cast(m_crawl.pFrame) == GetFirstArgReg(pRD->pCurrentContext)); - SetIP(pRD->pCurrentContext, m_interpExecMethodIP); - SetSP(pRD->pCurrentContext, m_interpExecMethodSP); - SetFP(pRD->pCurrentContext, m_interpExecMethodFP); - SetFirstArgReg(pRD->pCurrentContext, m_interpExecMethodFirstArgReg); -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - pRD->SSP = m_interpExecMethodSSP; -#endif - SyncRegDisplayToCurrentContext(pRD); - } - } - else if (InlinedCallFrame::FrameHasActiveCall(m_crawl.pFrame) && ((PTR_InlinedCallFrame)m_crawl.pFrame)->IsInInterpreter()) - { - // There is an active inlined call frame localed in the interpreter code. This is a special case where we need - // to save the current context registers that the interpreter frames walking reuses. - m_interpExecMethodIP = GetIP(pRD->pCurrentContext); - m_interpExecMethodSP = GetSP(pRD->pCurrentContext); - m_interpExecMethodFP = GetFP(pRD->pCurrentContext); - m_interpExecMethodFirstArgReg = GetFirstArgReg(pRD->pCurrentContext); -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - m_interpExecMethodSSP = pRD->SSP; -#endif - } - } -#endif // FEATURE_INTERPRETER - if (m_crawl.isFrameless) { //------------------------------------------------------------------------ diff --git a/src/coreclr/vm/stackwalk.h b/src/coreclr/vm/stackwalk.h index c82738a33684ce..4b2c70823a418a 100644 --- a/src/coreclr/vm/stackwalk.h +++ b/src/coreclr/vm/stackwalk.h @@ -639,18 +639,6 @@ class StackFrameIterator bool m_isRuntimeWrappedExceptions; // Indicates that the stack walk has moved past a funclet bool m_fFoundFirstFunclet; -#ifdef FEATURE_INTERPRETER - // Saved registers of the context of the InterpExecMethod. These registers are reused for interpreter frames, - // but we need to restore the original values after we are done with all the interpreted frames belonging to - // that InterpExecMethod. - TADDR m_interpExecMethodIP; - TADDR m_interpExecMethodSP; - TADDR m_interpExecMethodFP; - TADDR m_interpExecMethodFirstArgReg; -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - TADDR m_interpExecMethodSSP; -#endif // TARGET_AMD64 && TARGET_WINDOWS -#endif // FEATURE_INTERPRETER LPVOID m_pvResumableFrameTargetSP; ExInfo* m_pNextExInfo;