From d8d37e96fdadceeb37bf4fe5bfac04232d3cc369 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 22 Aug 2023 18:33:18 +0200 Subject: [PATCH 1/4] Fix stack_limit handling There is a problem with computing stack_limit in the presence of inactive InlinedCallFrame. We were trying to call GetCallSiteSP on that frame to get the call site SP. But when the inlined call frame is not active (there is no call to native code in progress), that method returns random junk. But even if we checked for an active call before reading the value, it would not get correct stack limit on x86 windows when there was no active call with the inlined call frame being the topmost one. That can happen with hardware exception handling on Windows x86. On other targets, we always have other explicit frame on top of the explicit frames stack, but on windows x86, we don't use the FaultingExceptionFrame for hardware exceptions, so the inactive inlined call frame could be the topmost one when GC starts to scan stack roots. Since the stack_limit was introduced to fix a Unix specific problem, I have fixed that by disabling the stack_limit usage for x86 windows. Close #86265 --- src/coreclr/gc/gcinterface.h | 8 ++++++++ src/coreclr/nativeaot/Runtime/gcrhenv.cpp | 3 ++- src/coreclr/vm/frames.cpp | 2 ++ src/coreclr/vm/gcenv.ee.cpp | 10 +++++++--- src/coreclr/vm/siginfo.cpp | 12 +++++++----- 5 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/coreclr/gc/gcinterface.h b/src/coreclr/gc/gcinterface.h index ee6451249cad9..746cdaa1c4262 100644 --- a/src/coreclr/gc/gcinterface.h +++ b/src/coreclr/gc/gcinterface.h @@ -1066,12 +1066,18 @@ inline GC_ALLOC_FLAGS& operator&=(GC_ALLOC_FLAGS& a, GC_ALLOC_FLAGS b) #define UNCHECKED_OBJECTREF_TO_OBJECTREF(obj) (obj) #endif +#if !defined(TARGET_X86) || defined(TARGET_UNIX) +#define USE_STACK_LIMIT +#endif + struct ScanContext { Thread* thread_under_crawl; int thread_number; int thread_count; +#ifdef USE_STACK_LIMIT uintptr_t stack_limit; // Lowest point on the thread stack that the scanning logic is permitted to read +#endif // USE_STACK_LIMIT bool promotion; //TRUE: Promotion, FALSE: Relocation. bool concurrent; //TRUE: concurrent scanning void* _unused1; @@ -1089,7 +1095,9 @@ struct ScanContext thread_under_crawl = 0; thread_number = -1; thread_count = -1; +#ifdef USE_STACK_LIMIT stack_limit = 0; +#endif // USE_STACK_LIMIT promotion = false; concurrent = false; pMD = NULL; diff --git a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp index 3ec488605c1b3..5f26da92dd83c 100644 --- a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp +++ b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp @@ -320,8 +320,9 @@ void RedhawkGCInterface::EnumGcRefs(ICodeManager * pCodeManager, ctx.pCallback = EnumGcRefsCallback; ctx.f = (EnumGcRefCallbackFunc *)pfnEnumCallback; ctx.sc = (EnumGcRefScanContext *)pvCallbackData; +#ifdef USE_STACK_LIMIT ctx.sc->stack_limit = pRegisterSet->GetSP(); - +#endif // USE_STACK_LIMIT pCodeManager->EnumGcRefs(pMethodInfo, safePointAddress, pRegisterSet, diff --git a/src/coreclr/vm/frames.cpp b/src/coreclr/vm/frames.cpp index 4cccc1e6dfc42..c5daf49c8b4d6 100644 --- a/src/coreclr/vm/frames.cpp +++ b/src/coreclr/vm/frames.cpp @@ -1555,8 +1555,10 @@ BOOL TransitionFrame::Protects(OBJECTREF * ppORef) { WRAPPER_NO_CONTRACT; IsObjRefProtectedScanContext sc (ppORef); +#ifdef USE_STACK_LIMIT // Set the stack limit for the scan to the SP of the managed frame above the transition frame sc.stack_limit = GetSP(); +#endif // USE_STACK_LIMIT GcScanRoots (IsObjRefProtected, &sc); return sc.oref_protected; } diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index dc28e791de0cf..52dc19a5d2a3a 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -115,16 +115,20 @@ static void ScanStackRoots(Thread * pThread, promote_func* fn, ScanContext* sc) IsGCSpecialThread() || (GetThread() == ThreadSuspend::GetSuspensionThread() && ThreadStore::HoldingThreadStore())); +#if defined(FEATURE_CONSERVATIVE_GC) || defined(USE_STACK_LIMIT) Frame* pTopFrame = pThread->GetFrame(); Object ** topStack = (Object **)pTopFrame; - if ((pTopFrame != ((Frame*)-1)) - && (pTopFrame->GetVTablePtr() == InlinedCallFrame::GetMethodFrameVPtr())) { - // It is an InlinedCallFrame. Get SP from it. + if (InlinedCallFrame::FrameHasActiveCall(pTopFrame)) + { + // It is an InlinedCallFrame with active call. Get SP from it. InlinedCallFrame* pInlinedFrame = (InlinedCallFrame*)pTopFrame; topStack = (Object **)pInlinedFrame->GetCallSiteSP(); } +#endif // FEATURE_CONSERVATIVE_GC || USE_STACK_LIMIT +#ifdef USE_STACK_LIMIT sc->stack_limit = (uintptr_t)topStack; +#endif // USE_STACK_LIMIT #ifdef FEATURE_CONSERVATIVE_GC if (g_pConfig->GetGCConservative()) diff --git a/src/coreclr/vm/siginfo.cpp b/src/coreclr/vm/siginfo.cpp index b9807b51ba4af..c226960fde87c 100644 --- a/src/coreclr/vm/siginfo.cpp +++ b/src/coreclr/vm/siginfo.cpp @@ -4962,11 +4962,11 @@ void PromoteCarefully(promote_func fn, #if !defined(DACCESS_COMPILE) +#ifdef USE_STACK_LIMIT // // Sanity check the stack scan limit // assert(sc->stack_limit != 0); - // Note that the base is at a higher address than the limit, since the stack // grows downwards. // To check whether the object is in the stack or not, we also need to check the sc->stack_limit. @@ -4974,10 +4974,12 @@ void PromoteCarefully(promote_func fn, // shrink the current reserved stack space. That causes the real limit of the stack to move up and // the range can be reused for other purposes. But the sc->stack_limit is stable during the scan. // Even on Windows, we care just about the stack above the stack_limit. - if ((sc->thread_under_crawl->IsAddressInStack(*ppObj)) && (PTR_TO_TADDR(*ppObj) >= sc->stack_limit)) - { - return; - } + if ((PTR_TO_TADDR(*ppObj) >= sc->stack_limit)) +#endif // USE_STACK_LIMIT + if (sc->thread_under_crawl->IsAddressInStack(*ppObj)) + { + return; + } if (sc->promotion) { From cfb35879a64e85b134a1a622e5e2f9e61a9b1671 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 22 Aug 2023 20:04:38 +0200 Subject: [PATCH 2/4] Add back the stack_limit field to keep contract unchanged --- src/coreclr/gc/gcinterface.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/gc/gcinterface.h b/src/coreclr/gc/gcinterface.h index 746cdaa1c4262..56ebd120ada86 100644 --- a/src/coreclr/gc/gcinterface.h +++ b/src/coreclr/gc/gcinterface.h @@ -1077,6 +1077,8 @@ struct ScanContext int thread_count; #ifdef USE_STACK_LIMIT uintptr_t stack_limit; // Lowest point on the thread stack that the scanning logic is permitted to read +#else // USE_STACK_LIMIT + uintptr_t _unused4; #endif // USE_STACK_LIMIT bool promotion; //TRUE: Promotion, FALSE: Relocation. bool concurrent; //TRUE: concurrent scanning From 8b59eaaf73b294e5de0cf7d002dbf7aa8066603c Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 22 Aug 2023 23:15:41 +0200 Subject: [PATCH 3/4] Reflect PR feedback - Minimalize the number of ifdefs --- src/coreclr/gc/gcinterface.h | 10 ---------- src/coreclr/nativeaot/Runtime/gcrhenv.cpp | 3 +-- src/coreclr/vm/frames.cpp | 2 -- src/coreclr/vm/gcenv.ee.cpp | 6 ++++++ src/coreclr/vm/siginfo.cpp | 15 ++++----------- 5 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/coreclr/gc/gcinterface.h b/src/coreclr/gc/gcinterface.h index 56ebd120ada86..ee6451249cad9 100644 --- a/src/coreclr/gc/gcinterface.h +++ b/src/coreclr/gc/gcinterface.h @@ -1066,20 +1066,12 @@ inline GC_ALLOC_FLAGS& operator&=(GC_ALLOC_FLAGS& a, GC_ALLOC_FLAGS b) #define UNCHECKED_OBJECTREF_TO_OBJECTREF(obj) (obj) #endif -#if !defined(TARGET_X86) || defined(TARGET_UNIX) -#define USE_STACK_LIMIT -#endif - struct ScanContext { Thread* thread_under_crawl; int thread_number; int thread_count; -#ifdef USE_STACK_LIMIT uintptr_t stack_limit; // Lowest point on the thread stack that the scanning logic is permitted to read -#else // USE_STACK_LIMIT - uintptr_t _unused4; -#endif // USE_STACK_LIMIT bool promotion; //TRUE: Promotion, FALSE: Relocation. bool concurrent; //TRUE: concurrent scanning void* _unused1; @@ -1097,9 +1089,7 @@ struct ScanContext thread_under_crawl = 0; thread_number = -1; thread_count = -1; -#ifdef USE_STACK_LIMIT stack_limit = 0; -#endif // USE_STACK_LIMIT promotion = false; concurrent = false; pMD = NULL; diff --git a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp index 5f26da92dd83c..3ec488605c1b3 100644 --- a/src/coreclr/nativeaot/Runtime/gcrhenv.cpp +++ b/src/coreclr/nativeaot/Runtime/gcrhenv.cpp @@ -320,9 +320,8 @@ void RedhawkGCInterface::EnumGcRefs(ICodeManager * pCodeManager, ctx.pCallback = EnumGcRefsCallback; ctx.f = (EnumGcRefCallbackFunc *)pfnEnumCallback; ctx.sc = (EnumGcRefScanContext *)pvCallbackData; -#ifdef USE_STACK_LIMIT ctx.sc->stack_limit = pRegisterSet->GetSP(); -#endif // USE_STACK_LIMIT + pCodeManager->EnumGcRefs(pMethodInfo, safePointAddress, pRegisterSet, diff --git a/src/coreclr/vm/frames.cpp b/src/coreclr/vm/frames.cpp index c5daf49c8b4d6..4cccc1e6dfc42 100644 --- a/src/coreclr/vm/frames.cpp +++ b/src/coreclr/vm/frames.cpp @@ -1555,10 +1555,8 @@ BOOL TransitionFrame::Protects(OBJECTREF * ppORef) { WRAPPER_NO_CONTRACT; IsObjRefProtectedScanContext sc (ppORef); -#ifdef USE_STACK_LIMIT // Set the stack limit for the scan to the SP of the managed frame above the transition frame sc.stack_limit = GetSP(); -#endif // USE_STACK_LIMIT GcScanRoots (IsObjRefProtected, &sc); return sc.oref_protected; } diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index 52dc19a5d2a3a..d0584c4d2a4e7 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -91,6 +91,10 @@ VOID GCToEEInterface::AfterGcScanRoots (int condemned, int max_gen, Interop::OnAfterGCScanRoots(sc->concurrent); } +#if !defined(TARGET_X86) || defined(TARGET_UNIX) +#define USE_STACK_LIMIT +#endif + /* * Scan all stack roots */ @@ -128,6 +132,8 @@ static void ScanStackRoots(Thread * pThread, promote_func* fn, ScanContext* sc) #ifdef USE_STACK_LIMIT sc->stack_limit = (uintptr_t)topStack; +#else // USE_STACK_LIMIT + sc->stack_limit = 0; #endif // USE_STACK_LIMIT #ifdef FEATURE_CONSERVATIVE_GC diff --git a/src/coreclr/vm/siginfo.cpp b/src/coreclr/vm/siginfo.cpp index c226960fde87c..eb35f3a372538 100644 --- a/src/coreclr/vm/siginfo.cpp +++ b/src/coreclr/vm/siginfo.cpp @@ -4962,11 +4962,6 @@ void PromoteCarefully(promote_func fn, #if !defined(DACCESS_COMPILE) -#ifdef USE_STACK_LIMIT - // - // Sanity check the stack scan limit - // - assert(sc->stack_limit != 0); // Note that the base is at a higher address than the limit, since the stack // grows downwards. // To check whether the object is in the stack or not, we also need to check the sc->stack_limit. @@ -4974,12 +4969,10 @@ void PromoteCarefully(promote_func fn, // shrink the current reserved stack space. That causes the real limit of the stack to move up and // the range can be reused for other purposes. But the sc->stack_limit is stable during the scan. // Even on Windows, we care just about the stack above the stack_limit. - if ((PTR_TO_TADDR(*ppObj) >= sc->stack_limit)) -#endif // USE_STACK_LIMIT - if (sc->thread_under_crawl->IsAddressInStack(*ppObj)) - { - return; - } + if ((sc->thread_under_crawl->IsAddressInStack(*ppObj)) && (PTR_TO_TADDR(*ppObj) >= sc->stack_limit)) + { + return; + } if (sc->promotion) { From 6d5d4d9734ee48ea5be231d62eb16905c84df6ae Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 23 Aug 2023 15:15:41 +0200 Subject: [PATCH 4/4] Reflect PR feedback --- src/coreclr/vm/gcenv.ee.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp index d0584c4d2a4e7..8d9f676d967a8 100644 --- a/src/coreclr/vm/gcenv.ee.cpp +++ b/src/coreclr/vm/gcenv.ee.cpp @@ -91,10 +91,6 @@ VOID GCToEEInterface::AfterGcScanRoots (int condemned, int max_gen, Interop::OnAfterGCScanRoots(sc->concurrent); } -#if !defined(TARGET_X86) || defined(TARGET_UNIX) -#define USE_STACK_LIMIT -#endif - /* * Scan all stack roots */ @@ -119,7 +115,7 @@ static void ScanStackRoots(Thread * pThread, promote_func* fn, ScanContext* sc) IsGCSpecialThread() || (GetThread() == ThreadSuspend::GetSuspensionThread() && ThreadStore::HoldingThreadStore())); -#if defined(FEATURE_CONSERVATIVE_GC) || defined(USE_STACK_LIMIT) +#if defined(FEATURE_CONSERVATIVE_GC) || defined(USE_FEF) Frame* pTopFrame = pThread->GetFrame(); Object ** topStack = (Object **)pTopFrame; if (InlinedCallFrame::FrameHasActiveCall(pTopFrame)) @@ -128,13 +124,19 @@ static void ScanStackRoots(Thread * pThread, promote_func* fn, ScanContext* sc) InlinedCallFrame* pInlinedFrame = (InlinedCallFrame*)pTopFrame; topStack = (Object **)pInlinedFrame->GetCallSiteSP(); } -#endif // FEATURE_CONSERVATIVE_GC || USE_STACK_LIMIT +#endif // FEATURE_CONSERVATIVE_GC || USE_FEF -#ifdef USE_STACK_LIMIT +#ifdef USE_FEF + // We only set the stack_limit when FEF (FaultingExceptionFrame) is enabled, because without the + // FEF, the code above would have to check if hardware exception is being handled and get the limit + // from the exception frame. Since the stack_limit is strictly necessary only on Unix and FEF is + // not enabled on Window x86 only, it is sufficient to keep the stack_limit set to 0 in this case. + // See the comment on the stack_limit usage in the PromoteCarefully function for more details. sc->stack_limit = (uintptr_t)topStack; -#else // USE_STACK_LIMIT - sc->stack_limit = 0; -#endif // USE_STACK_LIMIT +#else // USE_FEF + // It should be set to 0 in the ScanContext constructor + _ASSERTE(sc->stack_limit == 0); +#endif // USE_FEF #ifdef FEATURE_CONSERVATIVE_GC if (g_pConfig->GetGCConservative())