From abddacb807a27bbb4f846a755f0f409b67114b14 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 26 Apr 2024 18:25:39 -0700 Subject: [PATCH 01/26] port suspension algo from NativeAOT --- src/coreclr/vm/threads.cpp | 3 - src/coreclr/vm/threadsuspend.cpp | 453 ++++++++++++------------------- src/coreclr/vm/threadsuspend.h | 4 - 3 files changed, 176 insertions(+), 284 deletions(-) diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index e9e2409dde0c7b..c8ca8486cdc9bf 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -5184,9 +5184,6 @@ void ThreadStore::InitThreadStore() g_pThinLockThreadIdDispenser = new IdDispenser(); - ThreadSuspend::g_pGCSuspendEvent = new CLREvent(); - ThreadSuspend::g_pGCSuspendEvent->CreateManualEvent(FALSE); - s_pWaitForStackCrawlEvent = new CLREvent(); s_pWaitForStackCrawlEvent->CreateManualEvent(FALSE); diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index f79193888480d8..820551644877cb 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -21,8 +21,6 @@ bool ThreadSuspend::s_fSuspendRuntimeInProgress = false; bool ThreadSuspend::s_fSuspended = false; -CLREvent* ThreadSuspend::g_pGCSuspendEvent = NULL; - ThreadSuspend::SUSPEND_REASON ThreadSuspend::m_suspendReason; #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) @@ -2378,11 +2376,6 @@ void Thread::RareEnablePreemptiveGC() UnhijackThread(); #endif // FEATURE_HIJACK - // EnablePreemptiveGC already set us to preemptive mode before triggering the Rare path. - // the Rare path implies that someone else is observing us (e.g. SuspendRuntime). - // we have changed to preemptive mode, so signal that there was a suspension progress. - ThreadSuspend::g_pGCSuspendEvent->Set(); - // for GC, the fact that we are leaving the EE means that it no longer needs to // suspend us. But if we are doing a non-GC suspend, we need to block now. // Give the debugger precedence over user suspensions: @@ -3209,6 +3202,44 @@ COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND } #endif // PROFILING_SUPPORTED +int64_t QueryPerformanceCounter() +{ + LARGE_INTEGER ts; + QueryPerformanceCounter(&ts); + return ts.QuadPart; +} + +int64_t QueryPerformanceFrequency() +{ + LARGE_INTEGER ts; + QueryPerformanceFrequency(&ts); + return ts.QuadPart; +} + +// exponential spinwait with an approximate time limit for waiting in microsecond range. +// when iteration == -1, only usecLimit is used +void SpinWait(int iteration, int usecLimit) +{ + int64_t startTicks = QueryPerformanceCounter(); + int64_t ticksPerSecond = QueryPerformanceFrequency(); + int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000; + + int l = iteration >= 0 ? min(iteration, 30): 30; + for (int i = 0; i < l; i++) + { + for (int j = 0; j < (1 << i); j++) + { + System_YieldProcessor(); + } + + int64_t currentTicks = QueryPerformanceCounter(); + if (currentTicks > endTicks) + { + break; + } + } +} + //************************************************************************************ // // SuspendRuntime is responsible for ensuring that all managed threads reach a @@ -3329,329 +3360,201 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) // 1) we count the number of threads that are observed to be in cooperative mode. // 2) for threads currently running managed code, we try to redirect/jihack them. - // counts of cooperative threads - int previousCount = 0; - int countThreads = 0; - - // we will iterate over threads and check which are left in coop mode - // while checking, we also will try suspending and hijacking - // unless we have just done that, then we can observeOnly and see if situation improves - // we do not on uniprocessor though (spin-checking is pointless on uniprocessor) + int retries = 0; + int prevRemaining = 0; + int remaining = 0; bool observeOnly = false; _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); -#ifdef _DEBUG - DWORD dbgStartTimeout = GetTickCount(); -#endif - while (true) + while(true) { - Thread* thread = NULL; - while ((thread = ThreadStore::GetThreadList(thread)) != NULL) - { - if (thread == pCurThread) - continue; - - // on the first iteration check m_fPreemptiveGCDisabled unconditionally and - // mark interesting threads as TS_GCSuspendPending - if (previousCount == 0) - { - STRESS_LOG3(LF_SYNC, LL_INFO10000, " Inspecting thread 0x%x ID 0x%x coop mode = %d\n", - thread, thread->GetThreadId(), thread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()); - - - // ::FlushProcessWriteBuffers above guarantees that the state that we see here - // is after the trap flag is visible to the other thread. - // - // In other words: any threads seen in preemptive mode are no longer interesting to us. - // if they try switch to cooperative, they would see the flag set. -#ifdef FEATURE_PERFTRACING - // Mark that the thread is currently in managed code. - thread->SaveGCModeOnSuspension(); -#endif // FEATURE_PERFTRACING - if (!thread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) - { - _ASSERTE(!thread->HasThreadState(Thread::TS_GCSuspendFlags)); - continue; - } - else - { - countThreads++; - thread->SetThreadState(Thread::TS_GCSuspendPending); - } - } - - if (!thread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) - { - continue; - } + prevRemaining = remaining; + remaining = 0; - if (!thread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) - { - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive it is at a GC safe point\n", thread); - countThreads--; - thread->ResetThreadState(Thread::TS_GCSuspendFlags); + Thread* pTargetThread = NULL; + while ((pTargetThread = ThreadStore::GetThreadList(pTargetThread)) != NULL) + { + if (pTargetThread == pCurThread) continue; - } - if (observeOnly) - { + // GC threads can not be forced to run preemptively, so we will not try. + if (pTargetThread->IsGCSpecial()) continue; - } - if (thread->IsGCSpecial()) + if (pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) { - // GC threads can not be forced to run preemptively, so we will not try. - continue; - } - - // this is an interesting thread in cooperative mode, let's guide it to preemptive + // TODO: VS rethink this. + pTargetThread->SetThreadState(Thread::TS_GCSuspendPending); + remaining++; + if (!observeOnly) + { + // TODO: VS pTargetThread->Hijack(); - if (!Thread::UseContextBasedThreadRedirection()) - { - // On platforms that do not support safe thread suspension, we do one of the following: - // - // - If we're on a Unix platform where hijacking is enabled, we attempt - // to inject an activation which will try to redirect or hijack the - // thread to get it to a safe point. - // - // - Similarly to above, if we're on a Windows platform where the special - // user-mode APC is available, that is used if redirection is necessary. - // - // - Otherwise, we rely on the GCPOLL mechanism enabled by - // TrapReturningThreads. + if (!Thread::UseContextBasedThreadRedirection()) + { + // On platforms that do not support safe thread suspension, we do one of the following: + // + // - If we're on a Unix platform where hijacking is enabled, we attempt + // to inject an activation which will try to redirect or hijack the + // thread to get it to a safe point. + // + // - Similarly to above, if we're on a Windows platform where the special + // user-mode APC is available, that is used if redirection is necessary. + // + // - Otherwise, we rely on the GCPOLL mechanism enabled by + // TrapReturningThreads. #ifdef FEATURE_THREAD_ACTIVATION - bool success = thread->InjectActivation(Thread::ActivationReason::SuspendForGC); - if (!success) - { - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to inject an activation for thread %p.\n", thread); - } + bool success = pTargetThread->InjectActivation(Thread::ActivationReason::SuspendForGC); + if (!success) + { + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to inject an activation for thread %p.\n", pCurThread); + } #endif // FEATURE_THREAD_ACTIVATION - continue; - } + continue; + } -#ifndef DISABLE_THREADSUSPEND + #ifndef DISABLE_THREADSUSPEND - if (thread->HasThreadStateOpportunistic(Thread::TS_GCSuspendRedirected)) - { - // We have seen this thead before and have redirected it. - // No point in suspending it again. It will not run hijackable code until it parks itself. - continue; - } + if (pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendRedirected)) + { + // We have seen this thead before and have redirected it. + // No point in suspending it again. It will not run hijackable code until it parks itself. + continue; + } #ifdef TIME_SUSPEND - DWORD startSuspend = g_SuspendStatistics.GetTime(); + DWORD startSuspend = g_SuspendStatistics.GetTime(); #endif - // - // Suspend the native thread. - // + // + // Suspend the native thread. + // - // We can not allocate memory after we suspend a thread. - // Otherwise, we may deadlock the process, because the thread we just suspended - // might hold locks we would need to acquire while allocating. - ThreadStore::AllocateOSContext(); - Thread::SuspendThreadResult str = thread->SuspendThread(/*fOneTryOnly*/ TRUE); + // We can not allocate memory after we suspend a thread. + // Otherwise, we may deadlock the process, because the thread we just suspended + // might hold locks we would need to acquire while allocating. + ThreadStore::AllocateOSContext(); + Thread::SuspendThreadResult str = pTargetThread->SuspendThread(/*fOneTryOnly*/ TRUE); - // We should just always build with this TIME_SUSPEND stuff, and report the results via ETW. + // We should just always build with this TIME_SUSPEND stuff, and report the results via ETW. #ifdef TIME_SUSPEND - g_SuspendStatistics.osSuspend.Accumulate( - SuspendStatistics::GetElapsed(startSuspend, - g_SuspendStatistics.GetTime())); + g_SuspendStatistics.osSuspend.Accumulate( + SuspendStatistics::GetElapsed(startSuspend, + g_SuspendStatistics.GetTime())); - if (str == Thread::STR_Success) - g_SuspendStatistics.cntOSSuspendResume++; - else - g_SuspendStatistics.cntFailedSuspends++; + if (str == Thread::STR_Success) + g_SuspendStatistics.cntOSSuspendResume++; + else + g_SuspendStatistics.cntFailedSuspends++; #endif - switch (str) - { - case Thread::STR_Success: - // let's check the state of this one. - break; + switch (str) + { + case Thread::STR_Success: + // let's check the state of this one. + break; - case Thread::STR_Forbidden: - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Suspending thread 0x%x forbidden\n", thread); - continue; + case Thread::STR_Forbidden: + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Suspending thread 0x%x forbidden\n", pCurThread); + continue; - case Thread::STR_NoStressLog: - STRESS_LOG2(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d\n", thread, str); - continue; + case Thread::STR_NoStressLog: + STRESS_LOG2(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d\n", pCurThread, str); + continue; - case Thread::STR_UnstartedOrDead: - case Thread::STR_Failure: - STRESS_LOG3(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d, lastError = 0x%x\n", thread, str, GetLastError()); - continue; - } + case Thread::STR_UnstartedOrDead: + case Thread::STR_Failure: + STRESS_LOG3(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d, lastError = 0x%x\n", pCurThread, str, GetLastError()); + continue; + } - // the thread is suspended here, we can hijack, if platform supports. + // the thread is suspended here, we can hijack, if platform supports. - if (!thread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) - { - // actually, we are done with this one - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive while suspending it is at a GC safe point\n", thread); - countThreads--; - thread->ResetThreadState(Thread::TS_GCSuspendFlags); - thread->ResumeThread(); - continue; - } + if (!pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) + { + // actually, we are done with this one + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive while suspending it is at a GC safe point\n", pCurThread); + pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); + pTargetThread->ResumeThread(); + continue; + } - // We now know for sure that the thread is still in cooperative mode. If it's in JIT'd code, here - // is where we try to hijack/redirect the thread. If it's in VM code, we have to just let the VM - // finish what it's doing. + // We now know for sure that the thread is still in cooperative mode. If it's in JIT'd code, here + // is where we try to hijack/redirect the thread. If it's in VM code, we have to just let the VM + // finish what it's doing. #if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX) - { - Thread::WorkingOnThreadContextHolder workingOnThreadContext(thread); - - // Note that thread->HandledJITCase is not a simple predicate - it actually will hijack the thread if that's possible. - // So HandledJITCase can do one of these: - // - Return TRUE, in which case it's our responsibility to redirect the thread - // - Return FALSE after hijacking the thread - we shouldn't try to redirect - // - Return FALSE but not hijack the thread - there's nothing we can do either - if (workingOnThreadContext.Acquired() && thread->HandledJITCase()) - { - // Thread is in cooperative state and stopped in interruptible code. - // Redirect thread so we can capture a good thread context - // (GetThreadContext is not sufficient, due to an OS bug). - if (!thread->CheckForAndDoRedirectForGC()) { + Thread::WorkingOnThreadContextHolder workingOnThreadContext(pCurThread); + + // Note that pTargetThread->HandledJITCase is not a simple predicate - it actually will hijack the thread if that's possible. + // So HandledJITCase can do one of these: + // - Return TRUE, in which case it's our responsibility to redirect the thread + // - Return FALSE after hijacking the thread - we shouldn't try to redirect + // - Return FALSE but not hijack the thread - there's nothing we can do either + if (workingOnThreadContext.Acquired() && pTargetThread->HandledJITCase()) + { + // Thread is in cooperative state and stopped in interruptible code. + // Redirect thread so we can capture a good thread context + // (GetThreadContext is not sufficient, due to an OS bug). + if (!pTargetThread->CheckForAndDoRedirectForGC()) + { #ifdef TIME_SUSPEND - g_SuspendStatistics.cntFailedRedirections++; + g_SuspendStatistics.cntFailedRedirections++; #endif - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Thread %p\n", thread); - } - else - { + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Thread %p\n", pTargetThread); + } + else + { #ifdef TIME_SUSPEND - g_SuspendStatistics.cntRedirections++; + g_SuspendStatistics.cntRedirections++; #endif - thread->SetThreadState(Thread::TS_GCSuspendRedirected); - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n", thread); + pTargetThread->SetThreadState(Thread::TS_GCSuspendRedirected); + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n", pTargetThread); + } + } } - } - } #endif // FEATURE_HIJACK && !TARGET_UNIX - thread->ResumeThread(); - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread 0x%x is in cooperative needs to rendezvous\n", thread); + pTargetThread->ResumeThread(); + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread 0x%x is in cooperative needs to rendezvous\n", pTargetThread); #endif // !DISABLE_THREADSUSPEND + } + } + else + { + // TODO: VS rethink this. + pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); + } } - if (countThreads == 0) - { - // SUCCESS!! + if (!remaining) break; - } - bool hasProgress = previousCount != countThreads; - previousCount = countThreads; - - // If we have just updated hijacks/redirects, then do a pass while only observing. - // Repeat observing only as long as we see progress. Most threads react to hijack/redirect very fast and - // typically we can avoid waiting on an event. (except on uniprocessor where we do not spin) - // - // Otherwise redo hijacks, but check g_pGCSuspendEvent event on the way. - // Re-hijacking unconditionally is likely to execute exactly the same hijacks, - // while not letting the other threads to run much. - // Thus we require either PING_JIT_TIMEOUT or some progress between active suspension attempts. - if (g_SystemInfo.dwNumberOfProcessors > 1 && (hasProgress || !observeOnly)) + // if we see progress or have just done a hijacking pass + // do not hijack in the next iteration + if (remaining < prevRemaining || !observeOnly) { - // small pause - YieldProcessorNormalized(); - - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Spinning, %d threads remaining\n", countThreads); + // 5 usec delay, then check for more progress + SpinWait(-1, 5); observeOnly = true; - continue; } - -#ifdef TIME_SUSPEND - DWORD startWait = g_SuspendStatistics.GetTime(); -#endif - - // Wait for at least one thread to tell us it's left cooperative mode. - // we do this by waiting on g_pGCSuspendEvent. We cannot simply wait forever, because we - // might have done return-address hijacking on a thread, and that thread might not - // return from the method we hijacked (maybe it calls into some other managed code that - // executes a long loop, for example). We wait with a timeout, and retry hijacking/redirection. - // - // This is unfortunate, because it means that in some cases we wait for PING_JIT_TIMEOUT - // milliseconds, causing long GC pause times. - - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Waiting for suspend event %d threads remaining\n", countThreads); - DWORD res = g_pGCSuspendEvent->Wait(PING_JIT_TIMEOUT, FALSE); - -#ifdef TIME_SUSPEND - g_SuspendStatistics.wait.Accumulate( - SuspendStatistics::GetElapsed(startWait, - g_SuspendStatistics.GetTime())); - - g_SuspendStatistics.cntWaits++; - if (res == WAIT_TIMEOUT) - g_SuspendStatistics.cntWaitTimeouts++; -#endif - - if (res == WAIT_TIMEOUT || res == WAIT_IO_COMPLETION) + else { - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Timed out waiting for rendezvous event %d threads remaining\n", countThreads); -#ifdef _DEBUG - DWORD dbgEndTimeout = GetTickCount(); + SpinWait(retries++, 100); + observeOnly = false; - if ((dbgEndTimeout > dbgStartTimeout) && - (dbgEndTimeout - dbgStartTimeout > g_pConfig->SuspendDeadlockTimeout())) + // make sure our spining is not starving other threads, but not too often, + // this can cause a 1-15 msec delay, depending on OS, and that is a lot while + // very rarely needed, since threads are supposed to be releasing their CPUs + if ((retries & 127) == 0) { - // Do not change this to _ASSERTE. - // We want to catch the state of the machine at the - // time when we can not suspend some threads. - // It takes too long for _ASSERTE to stop the process. - DebugBreak(); - _ASSERTE(!"Timed out trying to suspend EE due to thread"); - char message[256]; - - Thread* thread = NULL; - while ((thread = ThreadStore::GetThreadList(thread)) != NULL) - { - if (thread == pCurThread) - continue; - - if ((thread->m_State & Thread::TS_GCSuspendPending) == 0) - continue; - - if (thread->m_fPreemptiveGCDisabled) - { - DWORD id = (DWORD) thread->m_OSThreadId; - if (id == 0xbaadf00d) - { - sprintf_s (message, ARRAY_SIZE(message), "Thread CLR ID=%x cannot be suspended", - thread->GetThreadId()); - } - else - { - sprintf_s (message, ARRAY_SIZE(message), "Thread OS ID=%x cannot be suspended", - id); - } - DbgAssertDialog(__FILE__, __LINE__, message); - } - } - // if we continue from the assert we'll reset the time - dbgStartTimeout = GetTickCount(); + SwitchToThread(); } -#endif - } - else if (res == WAIT_OBJECT_0) - { } - else - { - // No WAIT_FAILED, WAIT_ABANDONED, etc. - _ASSERTE(!"unexpected wait termination during gc suspension"); - } - - observeOnly = false; - g_pGCSuspendEvent->Reset(); } #ifdef PROFILING_SUPPORTED @@ -3675,9 +3578,6 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) } #endif - // We know all threads are in preemptive mode, so go ahead and reset the event. - g_pGCSuspendEvent->Reset(); - #ifdef HAVE_GCCOVER // // Now that the EE has been suspended, let's see if any oustanding @@ -6284,7 +6184,6 @@ void SuspendStatistics::DisplayAndUpdate() releaseTSL.DisplayAndUpdate(logFile, "Unlock ", &g_LastSuspendStatistics.releaseTSL, cntSuspends, g_LastSuspendStatistics.cntSuspends); osSuspend.DisplayAndUpdate (logFile, "OS Susp", &g_LastSuspendStatistics.osSuspend, cntOSSuspendResume, g_LastSuspendStatistics.cntOSSuspendResume); crawl.DisplayAndUpdate (logFile, "Crawl", &g_LastSuspendStatistics.crawl, cntHijackCrawl, g_LastSuspendStatistics.cntHijackCrawl); - wait.DisplayAndUpdate (logFile, "Wait", &g_LastSuspendStatistics.wait, cntWaits, g_LastSuspendStatistics.cntWaits); fprintf(logFile, "OS Suspend Failures %d (%d), Wait Timeouts %d (%d), Hijack traps %d (%d)\n", cntFailedSuspends - g_LastSuspendStatistics.cntFailedSuspends, cntFailedSuspends, diff --git a/src/coreclr/vm/threadsuspend.h b/src/coreclr/vm/threadsuspend.h index 6d034b91a1997a..3dcf3cff6cd4d7 100644 --- a/src/coreclr/vm/threadsuspend.h +++ b/src/coreclr/vm/threadsuspend.h @@ -108,9 +108,6 @@ struct SuspendStatistics /////////////////////////////////////////////////////////////////////////////////////////////// // There are some interesting events that are worth counting, because they show where the time is going: - // number of times we waited on g_pGCSuspendEvent while trying to suspend the EE - int cntWaits; - // and the number of times those Waits timed out rather than being signalled by a cooperating thread int cntWaitTimeouts; @@ -192,7 +189,6 @@ class ThreadSuspend static void Initialize(); private: - static CLREvent * g_pGCSuspendEvent; #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) static void* g_returnAddressHijackTarget; From b5091cc62d13d5557143fe9900fcf31baaf18312 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Fri, 26 Apr 2024 19:40:01 -0700 Subject: [PATCH 02/26] PING_JIT_TIMEOUT gone --- src/coreclr/vm/threadsuspend.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 820551644877cb..fcd63c2a44d0ac 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -56,11 +56,6 @@ extern "C" void RedirectedHandledJITCaseForGCStress_Stub(void); #endif // HAVE_GCCOVER && USE_REDIRECT_FOR_GCSTRESS #endif // TARGET_AMD64 || TARGET_ARM -// Every PING_JIT_TIMEOUT ms, check to see if a thread in JITted code has wandered -// into some fully interruptible code (or should have a different hijack to improve -// our chances of snagging it at a safe spot). -#define PING_JIT_TIMEOUT 1 - // When we find a thread in a spot that's not safe to abort -- how long to wait before // we try again. #define ABORT_POLL_TIMEOUT 10 From afa8fb6506a1abce979fbd3a5bf07563303720a3 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 27 Apr 2024 10:29:26 -0700 Subject: [PATCH 03/26] CatchAtSafePoint is always opportunistic --- src/coreclr/nativeaot/Runtime/thread.cpp | 8 -------- src/coreclr/nativeaot/Runtime/thread.h | 1 - src/coreclr/vm/threads.cpp | 2 +- src/coreclr/vm/threads.h | 11 ++++------- src/coreclr/vm/threadsuspend.cpp | 3 ++- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 1ae78d37d56fc4..c3a72d1846d959 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -328,14 +328,6 @@ bool Thread::IsGCSpecial() return IsStateSet(TSF_IsGcSpecialThread); } -bool Thread::CatchAtSafePoint() -{ - // This is only called by the GC on a background GC worker thread that's explicitly interested in letting - // a foreground GC proceed at that point. So it's always safe to return true. - ASSERT(IsGCSpecial()); - return true; -} - uint64_t Thread::GetPalThreadIdForLogging() { return m_threadId; diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 3c8073faa2ac70..abe095b994a398 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -297,7 +297,6 @@ class Thread : private ThreadBuffer // void SetGCSpecial(); bool IsGCSpecial(); - bool CatchAtSafePoint(); // // Managed/unmanaged interop transitions support APIs diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index c8ca8486cdc9bf..1c9d27a6e2aab4 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -7078,7 +7078,7 @@ void CommonTripThread() Thread *thread = GetThread(); thread->HandleThreadAbort (); - if (thread->CatchAtSafePoint()) + if (thread->CatchAtSafePointOpportunistic()) { _ASSERTE(!ThreadStore::HoldingThreadStore(thread)); #ifdef FEATURE_HIJACK diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 67c4b6b83c975b..2e1b979ed1ed13 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -936,15 +936,10 @@ class Thread void DoExtraWorkForFinalizer(); #ifndef DACCESS_COMPILE - DWORD CatchAtSafePoint() - { - LIMITED_METHOD_CONTRACT; - return (m_State & TS_CatchAtSafePoint); - } - DWORD CatchAtSafePointOpportunistic() { LIMITED_METHOD_CONTRACT; + return HasThreadStateOpportunistic(TS_CatchAtSafePoint); } #endif // DACCESS_COMPILE @@ -1423,7 +1418,9 @@ class Thread m_ulEnablePreemptiveGCCount ++; #endif // _DEBUG - if (CatchAtSafePoint()) + // TODO: VS remove this. no polling on going to preemptive + // Delete RareEnablePreemptiveGC as well. + if (CatchAtSafePointOpportunistic()) RareEnablePreemptiveGC(); #endif } diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index fcd63c2a44d0ac..206f06c555f7f9 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2413,7 +2413,8 @@ void Thread::PulseGCMode() _ASSERTE(this == GetThread()); - if (PreemptiveGCDisabled() && CatchAtSafePoint()) + // TODO: VS no need to check, but assert coop + if (PreemptiveGCDisabled() && CatchAtSafePointOpportunistic()) { EnablePreemptiveGC(); DisablePreemptiveGC(); From 08a90e8772c6ad5406fecd2c513905e906187a2b Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 27 Apr 2024 13:44:53 -0700 Subject: [PATCH 04/26] current --- src/coreclr/vm/threadsuspend.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 206f06c555f7f9..62cf6bd4deb861 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -5699,9 +5699,10 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) LOG((LF_GCROOTS | LF_GC | LF_CORDB, LL_INFO10, "The EE is free now...\n")); - // If someone's trying to suspent *this* thread, this is a good opportunity. + // If someone's trying to suspend *this* thread, this is a good opportunity. if (pCurThread && pCurThread->CatchAtSafePointOpportunistic()) { + pCurThread->PulseGCMode(); // Go suspend myself. } else @@ -5807,7 +5808,8 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) frame.Push(pThread); - pThread->PulseGCMode(); + pThread->EnablePreemptiveGC(); + pThread->DisablePreemptiveGC(); INSTALL_MANAGED_EXCEPTION_DISPATCHER; INSTALL_UNWIND_AND_CONTINUE_HANDLER; From bb4be451da043d731a63c973ebc091db7a09eb91 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 27 Apr 2024 15:28:36 -0700 Subject: [PATCH 05/26] removed RareEnablePreemptiveGC --- src/coreclr/debug/ee/debugger.h | 2 +- src/coreclr/vm/eedbginterfaceimpl.cpp | 2 +- src/coreclr/vm/i386/asmhelpers.S | 25 ------ src/coreclr/vm/i386/asmhelpers.asm | 24 ------ src/coreclr/vm/i386/cgenx86.cpp | 12 --- src/coreclr/vm/i386/stublinkerx86.cpp | 104 +++-------------------- src/coreclr/vm/i386/stublinkerx86.h | 3 - src/coreclr/vm/threads.cpp | 7 -- src/coreclr/vm/threads.h | 19 ----- src/coreclr/vm/threadsuspend.cpp | 115 ++++++++++---------------- 10 files changed, 57 insertions(+), 256 deletions(-) diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h index d66307fb146422..5b95f0ae757a6a 100644 --- a/src/coreclr/debug/ee/debugger.h +++ b/src/coreclr/debug/ee/debugger.h @@ -3807,7 +3807,7 @@ HANDLE OpenWin32EventOrThrow( // debugger may not be expecting it. Instead, just leave the lock and retry. // When we leave, we'll enter coop mode first and get suspended if a suspension is in progress. // Afterwards, we'll transition back into preemptive mode, and we'll block because this thread -// has been suspended by the debugger (see code:Thread::RareEnablePreemptiveGC). +// has been suspended by the debugger (see code:Thread::RareDisablePreemptiveGC). #define SENDIPCEVENT_BEGIN_EX(pDebugger, thread, gcxStmt) \ { \ FireEtwDebugIPCEventStart(); \ diff --git a/src/coreclr/vm/eedbginterfaceimpl.cpp b/src/coreclr/vm/eedbginterfaceimpl.cpp index 4634a3a3cc5a3e..822a46a536e397 100644 --- a/src/coreclr/vm/eedbginterfaceimpl.cpp +++ b/src/coreclr/vm/eedbginterfaceimpl.cpp @@ -652,7 +652,7 @@ void EEDbgInterfaceImpl::EnablePreemptiveGC(void) CONTRACTL { NOTHROW; - DISABLED(GC_TRIGGERS); // Disabled because disabled in RareEnablePreemptiveGC() + GC_NOTRIGGER; } CONTRACTL_END; diff --git a/src/coreclr/vm/i386/asmhelpers.S b/src/coreclr/vm/i386/asmhelpers.S index ff777550bfab58..54f58fd75bb620 100644 --- a/src/coreclr/vm/i386/asmhelpers.S +++ b/src/coreclr/vm/i386/asmhelpers.S @@ -126,31 +126,6 @@ LEAF_ENTRY RestoreFPUContext, _TEXT ret 4 LEAF_END RestoreFPUContext, _TEXT -// ----------------------------------------------------------------------- -// The out-of-line portion of the code to enable preemptive GC. -// After the work is done, the code jumps back to the "pRejoinPoint" -// which should be emitted right after the inline part is generated. -// -// Assumptions: -// ebx = Thread -// Preserves -// all registers except ecx. -// -// ----------------------------------------------------------------------- -NESTED_ENTRY StubRareEnable, _TEXT, NoHandler - push eax - push edx - - push ebx - - CHECK_STACK_ALIGNMENT - call C_FUNC(StubRareEnableWorker) - - pop edx - pop eax - ret -NESTED_END StubRareEnable, _TEXT - NESTED_ENTRY StubRareDisableTHROW, _TEXT, NoHandler push eax push edx diff --git a/src/coreclr/vm/i386/asmhelpers.asm b/src/coreclr/vm/i386/asmhelpers.asm index 1d02fc48f8d8b8..5f03683f588fac 100644 --- a/src/coreclr/vm/i386/asmhelpers.asm +++ b/src/coreclr/vm/i386/asmhelpers.asm @@ -24,7 +24,6 @@ EXTERN __imp__RtlUnwind@16:DWORD ifdef _DEBUG EXTERN _HelperMethodFrameConfirmState@20:PROC endif -EXTERN _StubRareEnableWorker@4:PROC ifdef FEATURE_COMINTEROP EXTERN _StubRareDisableHRWorker@4:PROC endif ; FEATURE_COMINTEROP @@ -394,29 +393,6 @@ endif retn 8 _CallJitEHFinallyHelper@8 ENDP -;----------------------------------------------------------------------- -; The out-of-line portion of the code to enable preemptive GC. -; After the work is done, the code jumps back to the "pRejoinPoint" -; which should be emitted right after the inline part is generated. -; -; Assumptions: -; ebx = Thread -; Preserves -; all registers except ecx. -; -;----------------------------------------------------------------------- -_StubRareEnable proc public - push eax - push edx - - push ebx - call _StubRareEnableWorker@4 - - pop edx - pop eax - retn -_StubRareEnable ENDP - ifdef FEATURE_COMINTEROP _StubRareDisableHR proc public push edx diff --git a/src/coreclr/vm/i386/cgenx86.cpp b/src/coreclr/vm/i386/cgenx86.cpp index 108bc66a99b153..d9dc22d148193b 100644 --- a/src/coreclr/vm/i386/cgenx86.cpp +++ b/src/coreclr/vm/i386/cgenx86.cpp @@ -911,18 +911,6 @@ Stub *GenerateInitPInvokeFrameHelper() RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); } - -extern "C" VOID STDCALL StubRareEnableWorker(Thread *pThread) -{ - WRAPPER_NO_CONTRACT; - - //printf("RareEnable\n"); - pThread->RareEnablePreemptiveGC(); -} - - - - // Disable when calling into managed code from a place that fails via Exceptions extern "C" VOID STDCALL StubRareDisableTHROWWorker(Thread *pThread) { diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 76d888c0c52756..7dde57bf8ebd55 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -45,8 +45,6 @@ #ifndef DACCESS_COMPILE - -extern "C" VOID __cdecl StubRareEnable(Thread *pThread); #ifdef FEATURE_COMINTEROP extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread); #endif // FEATURE_COMINTEROP @@ -2615,6 +2613,8 @@ void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr, PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); PRECONDITION(rgRejoinLabels != NULL); PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); + PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); } CONTRACTL_END; @@ -2623,11 +2623,9 @@ void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr, // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI); - //----------------------------------------------------------------------- - // Generate the inline part of disabling preemptive GC - //----------------------------------------------------------------------- - EmitEnable(rgRareLabels[2]); // rare gc - EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc + // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0 + X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag()); + Emit8(0); // add esp, popstack X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters()); @@ -2651,12 +2649,6 @@ void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr, // keeps on going past the previous "jmp eax". X86EmitReturn(0); - //----------------------------------------------------------------------- - // The out-of-line portion of enabling preemptive GC - rarely executed - //----------------------------------------------------------------------- - EmitLabel(rgRareLabels[2]); // label for rare enable gc - EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc - //----------------------------------------------------------------------- // The out-of-line portion of disabling preemptive GC - rarely executed //----------------------------------------------------------------------- @@ -2736,6 +2728,8 @@ void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr, PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); PRECONDITION(rgRejoinLabels != NULL); PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); + PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); } CONTRACTL_END; @@ -2748,12 +2742,13 @@ void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr, X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI); //----------------------------------------------------------------------- - // Generate the inline part of enabling preemptive GC + // Generate enabling preemptive GC //----------------------------------------------------------------------- EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode - EmitEnable(rgRareLabels[2]); // rare enable gc - EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc + // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0 + X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag()); + Emit8(0); #ifdef PROFILING_SUPPORTED // If profiling is active, emit code to notify profiler of transition @@ -2800,12 +2795,6 @@ void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr, // keeps on going past the previous "jmp ecx". X86EmitReturn(0); - //----------------------------------------------------------------------- - // The out-of-line portion of enabling preemptive GC - rarely executed - //----------------------------------------------------------------------- - EmitLabel(rgRareLabels[2]); // label for rare enable gc - EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc - //----------------------------------------------------------------------- // The out-of-line portion of disabling preemptive GC - rarely executed //----------------------------------------------------------------------- @@ -3335,77 +3324,6 @@ VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction() #if defined(FEATURE_COMINTEROP) && defined(TARGET_X86) -//----------------------------------------------------------------------- -// Generates the inline portion of the code to enable preemptive GC. Hopefully, -// the inline code is all that will execute most of the time. If this code -// path is entered at certain times, however, it will need to jump out to -// a separate out-of-line path which is more expensive. The "pForwardRef" -// label indicates the start of the out-of-line path. -// -// Assumptions: -// ebx = Thread -// Preserves -// all registers except ecx. -// -//----------------------------------------------------------------------- -VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef) -{ - CONTRACTL - { - STANDARD_VM_CHECK; - - PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); - PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); - } - CONTRACTL_END; - - // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0 - X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag()); - Emit8(0); - - _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint)); - - // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint - X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState()); - Emit8(Thread::TS_CatchAtSafePoint); - - // jnz RarePath - X86EmitCondJump(pForwardRef, X86CondCode::kJNZ); - -#ifdef _DEBUG - X86EmitDebugTrashReg(kECX); -#endif - -} - - -//----------------------------------------------------------------------- -// Generates the out-of-line portion of the code to enable preemptive GC. -// After the work is done, the code jumps back to the "pRejoinPoint" -// which should be emitted right after the inline part is generated. -// -// Assumptions: -// ebx = Thread -// Preserves -// all registers except ecx. -// -//----------------------------------------------------------------------- -VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint) -{ - STANDARD_VM_CONTRACT; - - X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0); -#ifdef _DEBUG - X86EmitDebugTrashReg(kECX); -#endif - if (pRejoinPoint) - { - X86EmitNearJump(pRejoinPoint); - } - -} - - //----------------------------------------------------------------------- // Generates the inline portion of the code to disable preemptive GC. Hopefully, // the inline code is all that will execute most of the time. If this code diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h index 922babee24a2fb..7afb4f6a5ffe8c 100644 --- a/src/coreclr/vm/i386/stublinkerx86.h +++ b/src/coreclr/vm/i386/stublinkerx86.h @@ -323,9 +323,6 @@ class StubLinkerCPU : public StubLinker } #if defined(FEATURE_COMINTEROP) && defined(TARGET_X86) - VOID EmitEnable(CodeLabel *pForwardRef); - VOID EmitRareEnable(CodeLabel *pRejoinPoint); - VOID EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg); VOID EmitRareDisable(CodeLabel *pRejoinPoint); VOID EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint); diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 1c9d27a6e2aab4..34b90e0647e91a 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -1399,13 +1399,6 @@ Thread::Thread() #ifdef _DEBUG m_ulForbidTypeLoad = 0; m_GCOnTransitionsOK = TRUE; -#endif - -#ifdef ENABLE_CONTRACTS - m_ulEnablePreemptiveGCCount = 0; -#endif - -#ifdef _DEBUG dbg_m_cSuspendedThreads = 0; dbg_m_cSuspendedThreadsWithoutOSLock = 0; m_Creator.Clear(); diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 2e1b979ed1ed13..cbd795ff99886d 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -1397,11 +1397,6 @@ class Thread // holding a spin lock in coop mode and transit to preemp mode will cause deadlock on GC _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0); -#ifdef ENABLE_CONTRACTS_IMPL - _ASSERTE(!GCForbidden()); - TriggersGC(this); -#endif - // ------------------------------------------------------------------------ // ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING ** WARNING ** | // ------------------------------------------------------------------------ @@ -1414,21 +1409,12 @@ class Thread // ------------------------------------------------------------------------ m_fPreemptiveGCDisabled.StoreWithoutBarrier(0); -#ifdef ENABLE_CONTRACTS - m_ulEnablePreemptiveGCCount ++; -#endif // _DEBUG - - // TODO: VS remove this. no polling on going to preemptive - // Delete RareEnablePreemptiveGC as well. - if (CatchAtSafePointOpportunistic()) - RareEnablePreemptiveGC(); #endif } #if defined(STRESS_HEAP) && defined(_DEBUG) void PerformPreemptiveGC(); #endif - void RareEnablePreemptiveGC(); void PulseGCMode(); //-------------------------------------------------------------- @@ -2804,11 +2790,6 @@ class Thread #endif // TRACK_SYNC -private: -#ifdef ENABLE_CONTRACTS_DATA - ULONG m_ulEnablePreemptiveGCCount; -#endif // _DEBUG - private: // For suspends: CLREvent m_DebugSuspendEvent; diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 62cf6bd4deb861..70b89fcdecdf41 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2108,6 +2108,48 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } +#if defined(STRESS_HEAP) && defined(_DEBUG) + if (!IsDetached()) + { + PerformPreemptiveGC(); + } +#endif + + // TODO: VS can we actually hold TSL here? + if (m_State & TS_DebugSuspendPending) + { + if (!ThreadStore::HoldingThreadStore(this)) + { + #ifdef FEATURE_HIJACK + // TODO: VS can we have hijacks here? why do we remove them? + // Remove any hijacks we might have. + UnhijackThread(); + #endif // FEATURE_HIJACK + + // If GC suspend is in progress we will block while switching to coop mode. + // But if we are doing a non-GC suspend, we need to block now. + // Give the debugger precedence over user suspensions: + while ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) + { + #ifdef DEBUGGING_SUPPORTED + // We don't notify the debugger that this thread is now suspended. We'll just + // let the debugger's helper thread sweep and pick it up. + // We also never take the TSL in here either. + // Life's much simpler this way... + #endif // DEBUGGING_SUPPORTED + + #ifdef LOGGING + { + LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: debug suspended while switching to coop mode.\n", GetThreadId())); + } + #endif + // unsets TS_DebugSuspendPending | TS_SyncSuspended + WaitSuspendEvents(); + } + } + } + + // Note IsGCInProgress is also true for say Pause (anywhere SuspendEE happens) and GCThread is the // thread that did the Pause. While in Pause if another thread attempts Rev/Pinvoke it should get inside the following and // block until resume @@ -2186,6 +2228,7 @@ void Thread::RareDisablePreemptiveGC() break; } + // TODO: VS why is this? Have we not just pulsed through GC? __SwitchToThread(0, ++dwSwitchCount); } STRESS_LOG0(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: leaving\n"); @@ -2271,7 +2314,7 @@ void Thread::PreWorkForThreadAbort() #if defined(STRESS_HEAP) && defined(_DEBUG) -// This function is for GC stress testing. Before we enable preemptive GC, let us do a GC +// This function is for GC stress testing. Before we disable preemptive GC, let us do a GC // because GC may happen while the thread is in preemptive GC mode. void Thread::PerformPreemptiveGC() { @@ -2329,76 +2372,6 @@ void Thread::PerformPreemptiveGC() } #endif // STRESS_HEAP && DEBUG -// To leave cooperative mode and enter preemptive mode, if a GC is in progress, we -// no longer care to suspend this thread. But if we are trying to suspend the thread -// for other reasons (e.g. Thread.Suspend()), now is a good time. -// -// Note that it is possible for an N/Direct call to leave the EE without explicitly -// enabling preemptive GC. -void Thread::RareEnablePreemptiveGC() -{ - CONTRACTL { - NOTHROW; - DISABLED(GC_TRIGGERS); // I think this is actually wrong: prevents a p->c->p mode switch inside a NOTRIGGER region. - } - CONTRACTL_END; - - // @todo - Needs a hard SO probe - CONTRACT_VIOLATION(GCViolation|FaultViolation); - - // If we have already received our PROCESS_DETACH during shutdown, there is only one thread in the - // process and no coordination is necessary. - if (IsAtProcessExit()) - return; - - _ASSERTE (!m_fPreemptiveGCDisabled); - - // holding a spin lock in coop mode and transit to preemp mode will cause deadlock on GC - _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0); - - _ASSERTE(!MethodDescBackpatchInfoTracker::IsLockOwnedByCurrentThread() || IsInForbidSuspendForDebuggerRegion()); - -#if defined(STRESS_HEAP) && defined(_DEBUG) - if (!IsDetached()) - PerformPreemptiveGC(); -#endif - - STRESS_LOG1(LF_SYNC, LL_INFO100000, "RareEnablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); - if (!ThreadStore::HoldingThreadStore(this)) - { -#ifdef FEATURE_HIJACK - // Remove any hijacks we might have. - UnhijackThread(); -#endif // FEATURE_HIJACK - - // for GC, the fact that we are leaving the EE means that it no longer needs to - // suspend us. But if we are doing a non-GC suspend, we need to block now. - // Give the debugger precedence over user suspensions: - while ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) - { - -#ifdef DEBUGGING_SUPPORTED - // We don't notify the debugger that this thread is now suspended. We'll just - // let the debugger's helper thread sweep and pick it up. - // We also never take the TSL in here either. - // Life's much simpler this way... - - -#endif // DEBUGGING_SUPPORTED - -#ifdef LOGGING - { - LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: suspended while enabling gc.\n", GetThreadId())); - } -#endif - - WaitSuspendEvents(); // sets bits, too - - } - } - STRESS_LOG0(LF_SYNC, LL_INFO100000, "RareEnablePreemptiveGC: leaving.\n"); -} - // Called when we are passing through a safe point in CommonTripThread or // HandleSuspensionForInterruptedThread. Do the right thing with this thread, // which can either mean waiting for the GC to complete, or performing a From ec11b1809968204ca31bf170e50971215253d8f6 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 27 Apr 2024 19:27:12 -0700 Subject: [PATCH 06/26] cleanup RareDisablePreemptiveGC --- src/coreclr/vm/threads.h | 1 + src/coreclr/vm/threadsuspend.cpp | 115 ++++++++++++------------------- 2 files changed, 46 insertions(+), 70 deletions(-) diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index cbd795ff99886d..528b53b1b479c4 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -4281,6 +4281,7 @@ class ThreadStore // this means that no thread sharing the same scheduler with T2 can run. If T1 needs a lock which // is owned by one thread on the scheduler, T1 will wait forever. // Our solution is to move T2 to a safe point, resume it, and then do stack crawl. + // TODO: VS verify if above is true static CLREvent *s_pWaitForStackCrawlEvent; public: static void WaitForStackCrawlEvent() diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 70b89fcdecdf41..4177df9b5350a8 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2046,7 +2046,7 @@ extern void WaitForEndOfShutdown(); //---------------------------------------------------------------------------- // A note on SUSPENSIONS. -// +// TODO: VS comment needs updating? // We must not suspend a thread while it is holding the ThreadStore lock, or // the lock on the thread. Why? Because we need those locks to resume the // thread (and to perform a GC, use the debugger, spawn or kill threads, etc.) @@ -2108,6 +2108,8 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } + STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); + #if defined(STRESS_HEAP) && defined(_DEBUG) if (!IsDetached()) { @@ -2115,21 +2117,13 @@ void Thread::RareDisablePreemptiveGC() } #endif - // TODO: VS can we actually hold TSL here? - if (m_State & TS_DebugSuspendPending) + while (true) { + // TODO: VS can we actually hold TSL here? if (!ThreadStore::HoldingThreadStore(this)) { - #ifdef FEATURE_HIJACK - // TODO: VS can we have hijacks here? why do we remove them? - // Remove any hijacks we might have. - UnhijackThread(); - #endif // FEATURE_HIJACK - - // If GC suspend is in progress we will block while switching to coop mode. - // But if we are doing a non-GC suspend, we need to block now. - // Give the debugger precedence over user suspensions: - while ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) + // If debugger wants the thread to suspend, give the debugger precedence. + if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) { #ifdef DEBUGGING_SUPPORTED // We don't notify the debugger that this thread is now suspended. We'll just @@ -2138,6 +2132,12 @@ void Thread::RareDisablePreemptiveGC() // Life's much simpler this way... #endif // DEBUGGING_SUPPORTED + #ifdef FEATURE_HIJACK + // TODO: VS can we have hijacks here? why do we remove them? + // Remove any hijacks we might have. + UnhijackThread(); + #endif // FEATURE_HIJACK + #ifdef LOGGING { LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: debug suspended while switching to coop mode.\n", GetThreadId())); @@ -2145,35 +2145,15 @@ void Thread::RareDisablePreemptiveGC() #endif // unsets TS_DebugSuspendPending | TS_SyncSuspended WaitSuspendEvents(); - } - } - } - - - // Note IsGCInProgress is also true for say Pause (anywhere SuspendEE happens) and GCThread is the - // thread that did the Pause. While in Pause if another thread attempts Rev/Pinvoke it should get inside the following and - // block until resume - if ((GCHeapUtilities::IsGCInProgress() && (this != ThreadSuspend::GetSuspensionThread())) || - ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) || - (m_State & TS_StackCrawlNeeded)) - { - STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); - - DWORD dwSwitchCount = 0; - while (true) - { - EnablePreemptiveGC(); - - // Cannot use GCX_PREEMP_NO_DTOR here because we're inside of the thread - // PREEMP->COOP switch mechanism and GCX_PREEMP's assert's will fire. - // Instead we use BEGIN_GCX_ASSERT_PREEMP to inform Scan of the mode - // change here. - BEGIN_GCX_ASSERT_PREEMP; + // check again if we have something to do + continue; + } - // just wait until the GC is over. - if (this != ThreadSuspend::GetSuspensionThread()) + if (GCHeapUtilities::IsGCInProgress()) { + EnablePreemptiveGC(); + #ifdef PROFILING_SUPPORTED // If profiler desires GC events, notify it that this thread is waiting until the GC is over // Do not send suspend notifications for debugger suspensions @@ -2193,14 +2173,6 @@ void Thread::RareDisablePreemptiveGC() EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Waiting for GC completion failed")); } - if (!GCHeapUtilities::IsGCInProgress()) - { - if (HasThreadState(TS_StackCrawlNeeded)) - { - ThreadStore::WaitForStackCrawlEvent(); - } - } - #ifdef PROFILING_SUPPORTED // Let the profiler know that this thread is resuming { @@ -2209,31 +2181,29 @@ void Thread::RareDisablePreemptiveGC() END_PROFILER_CALLBACK(); } #endif // PROFILING_SUPPORTED - } - - END_GCX_ASSERT_PREEMP; - // disable preemptive gc. - InterlockedOr((LONG*)&m_fPreemptiveGCDisabled, 1); + // disable preemptive gc. + m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - // The fact that we check whether 'this' is the GC thread may seem - // strange. After all, we determined this before entering the method. - // However, it is possible for the current thread to become the GC - // thread while in this loop. This happens if you use the COM+ - // debugger to suspend this thread and then release it. - if (! ((GCHeapUtilities::IsGCInProgress() && (this != ThreadSuspend::GetSuspensionThread())) || - ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) || - (m_State & TS_StackCrawlNeeded)) ) - { - break; + // check again if we have something to do + continue; } + } - // TODO: VS why is this? Have we not just pulsed through GC? - __SwitchToThread(0, ++dwSwitchCount); + if (HasThreadState(TS_StackCrawlNeeded)) + { + ThreadStore::WaitForStackCrawlEvent(); + + // check again if we have something to do + continue; } - STRESS_LOG0(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: leaving\n"); + + // nothing else to do + break; } + STRESS_LOG0(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: leaving\n"); + Exit: ; END_PRESERVE_LAST_ERROR; } @@ -3353,12 +3323,15 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) if (pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) { - // TODO: VS rethink this. - pTargetThread->SetThreadState(Thread::TS_GCSuspendPending); + if (!pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) + { + pTargetThread->SetThreadState(Thread::TS_GCSuspendPending); + } + remaining++; if (!observeOnly) { - // TODO: VS pTargetThread->Hijack(); + // TODO: VS factor out; if (!Thread::UseContextBasedThreadRedirection()) { @@ -3495,8 +3468,10 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) } else { - // TODO: VS rethink this. - pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); + if (pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) + { + pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); + } } } From 17a83c6b357d1b23515e51fe1708e6f313a78b20 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 00:36:47 -0700 Subject: [PATCH 07/26] fix for x86 --- src/coreclr/vm/comtoclrcall.cpp | 2 -- src/coreclr/vm/i386/stublinkerx86.cpp | 12 ++++++------ src/coreclr/vm/threadsuspend.cpp | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/comtoclrcall.cpp b/src/coreclr/vm/comtoclrcall.cpp index 215b6c61bb5812..9a65a3ebc8501c 100644 --- a/src/coreclr/vm/comtoclrcall.cpp +++ b/src/coreclr/vm/comtoclrcall.cpp @@ -1339,14 +1339,12 @@ Stub* ComCall::CreateGenericComCallStub(BOOL isFieldAccess) // ends up throwing an OOM exception. CodeLabel* rgRareLabels[] = { - psl->NewCodeLabel(), psl->NewCodeLabel(), psl->NewCodeLabel() }; CodeLabel* rgRejoinLabels[] = { - psl->NewCodeLabel(), psl->NewCodeLabel(), psl->NewCodeLabel() }; diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 7dde57bf8ebd55..7a449213e767f4 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -2504,9 +2504,9 @@ void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr, STANDARD_VM_CHECK; PRECONDITION(rgRareLabels != NULL); - PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL); PRECONDITION(rgRejoinLabels != NULL); - PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL); } CONTRACTL_END; @@ -2610,9 +2610,9 @@ void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr, STANDARD_VM_CHECK; PRECONDITION(rgRareLabels != NULL); - PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL); PRECONDITION(rgRejoinLabels != NULL); - PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL); PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); } @@ -2725,9 +2725,9 @@ void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr, STANDARD_VM_CHECK; PRECONDITION(rgRareLabels != NULL); - PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL); + PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL); PRECONDITION(rgRejoinLabels != NULL); - PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL); + PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL); PRECONDITION(4 == sizeof( ((Thread*)0)->m_State )); PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled )); } diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 4177df9b5350a8..da239b948758d7 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2201,7 +2201,7 @@ void Thread::RareDisablePreemptiveGC() // nothing else to do break; } - + STRESS_LOG0(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: leaving\n"); Exit: ; From e9be64b8d22eb219e1df0af6202cf3f561dc9e38 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 08:45:44 -0700 Subject: [PATCH 08/26] factored out Thread::Hijack --- src/coreclr/vm/threads.h | 3 + src/coreclr/vm/threadsuspend.cpp | 341 ++++++++++++++++--------------- 2 files changed, 182 insertions(+), 162 deletions(-) diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 528b53b1b479c4..96db26958f6a68 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -548,6 +548,7 @@ class Thread friend void STDCALL OnHijackWorker(HijackArgs * pArgs); #ifdef FEATURE_THREAD_ACTIVATION friend void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext); + friend BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread); #endif // FEATURE_THREAD_ACTIVATION #endif // FEATURE_HIJACK @@ -1801,7 +1802,9 @@ class Thread ThreadAbort, }; + void Hijack(); bool InjectActivation(ActivationReason reason); + #endif // FEATURE_THREAD_ACTIVATION #ifndef DISABLE_THREADSUSPEND diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index da239b948758d7..4f445a36090eb6 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2125,24 +2125,24 @@ void Thread::RareDisablePreemptiveGC() // If debugger wants the thread to suspend, give the debugger precedence. if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) { - #ifdef DEBUGGING_SUPPORTED +#ifdef DEBUGGING_SUPPORTED // We don't notify the debugger that this thread is now suspended. We'll just // let the debugger's helper thread sweep and pick it up. // We also never take the TSL in here either. // Life's much simpler this way... - #endif // DEBUGGING_SUPPORTED +#endif // DEBUGGING_SUPPORTED - #ifdef FEATURE_HIJACK +#ifdef FEATURE_HIJACK // TODO: VS can we have hijacks here? why do we remove them? // Remove any hijacks we might have. UnhijackThread(); - #endif // FEATURE_HIJACK +#endif // FEATURE_HIJACK - #ifdef LOGGING +#ifdef LOGGING { LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: debug suspended while switching to coop mode.\n", GetThreadId())); } - #endif +#endif // unsets TS_DebugSuspendPending | TS_SyncSuspended WaitSuspendEvents(); @@ -2167,6 +2167,10 @@ void Thread::RareDisablePreemptiveGC() } #endif // PROFILING_SUPPORTED +#if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX) + ResetThreadState(Thread::TS_GCSuspendRedirected); +#endif + DWORD status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete(); if (status != S_OK) { @@ -3331,146 +3335,14 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) remaining++; if (!observeOnly) { - // TODO: VS factor out; - - if (!Thread::UseContextBasedThreadRedirection()) - { - // On platforms that do not support safe thread suspension, we do one of the following: - // - // - If we're on a Unix platform where hijacking is enabled, we attempt - // to inject an activation which will try to redirect or hijack the - // thread to get it to a safe point. - // - // - Similarly to above, if we're on a Windows platform where the special - // user-mode APC is available, that is used if redirection is necessary. - // - // - Otherwise, we rely on the GCPOLL mechanism enabled by - // TrapReturningThreads. - -#ifdef FEATURE_THREAD_ACTIVATION - bool success = pTargetThread->InjectActivation(Thread::ActivationReason::SuspendForGC); - if (!success) - { - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to inject an activation for thread %p.\n", pCurThread); - } -#endif // FEATURE_THREAD_ACTIVATION - - continue; - } - - #ifndef DISABLE_THREADSUSPEND - - if (pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendRedirected)) - { - // We have seen this thead before and have redirected it. - // No point in suspending it again. It will not run hijackable code until it parks itself. - continue; - } - -#ifdef TIME_SUSPEND - DWORD startSuspend = g_SuspendStatistics.GetTime(); -#endif - - // - // Suspend the native thread. - // - - // We can not allocate memory after we suspend a thread. - // Otherwise, we may deadlock the process, because the thread we just suspended - // might hold locks we would need to acquire while allocating. - ThreadStore::AllocateOSContext(); - Thread::SuspendThreadResult str = pTargetThread->SuspendThread(/*fOneTryOnly*/ TRUE); - - // We should just always build with this TIME_SUSPEND stuff, and report the results via ETW. -#ifdef TIME_SUSPEND - g_SuspendStatistics.osSuspend.Accumulate( - SuspendStatistics::GetElapsed(startSuspend, - g_SuspendStatistics.GetTime())); - - if (str == Thread::STR_Success) - g_SuspendStatistics.cntOSSuspendResume++; - else - g_SuspendStatistics.cntFailedSuspends++; -#endif - - switch (str) - { - case Thread::STR_Success: - // let's check the state of this one. - break; - - case Thread::STR_Forbidden: - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Suspending thread 0x%x forbidden\n", pCurThread); - continue; - - case Thread::STR_NoStressLog: - STRESS_LOG2(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d\n", pCurThread, str); - continue; - - case Thread::STR_UnstartedOrDead: - case Thread::STR_Failure: - STRESS_LOG3(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d, lastError = 0x%x\n", pCurThread, str, GetLastError()); - continue; - } - - // the thread is suspended here, we can hijack, if platform supports. - - if (!pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) - { - // actually, we are done with this one - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive while suspending it is at a GC safe point\n", pCurThread); - pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); - pTargetThread->ResumeThread(); - continue; - } - - // We now know for sure that the thread is still in cooperative mode. If it's in JIT'd code, here - // is where we try to hijack/redirect the thread. If it's in VM code, we have to just let the VM - // finish what it's doing. - -#if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX) - { - Thread::WorkingOnThreadContextHolder workingOnThreadContext(pCurThread); - - // Note that pTargetThread->HandledJITCase is not a simple predicate - it actually will hijack the thread if that's possible. - // So HandledJITCase can do one of these: - // - Return TRUE, in which case it's our responsibility to redirect the thread - // - Return FALSE after hijacking the thread - we shouldn't try to redirect - // - Return FALSE but not hijack the thread - there's nothing we can do either - if (workingOnThreadContext.Acquired() && pTargetThread->HandledJITCase()) - { - // Thread is in cooperative state and stopped in interruptible code. - // Redirect thread so we can capture a good thread context - // (GetThreadContext is not sufficient, due to an OS bug). - if (!pTargetThread->CheckForAndDoRedirectForGC()) - { -#ifdef TIME_SUSPEND - g_SuspendStatistics.cntFailedRedirections++; -#endif - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Thread %p\n", pTargetThread); - } - else - { -#ifdef TIME_SUSPEND - g_SuspendStatistics.cntRedirections++; -#endif - pTargetThread->SetThreadState(Thread::TS_GCSuspendRedirected); - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n", pTargetThread); - } - } - } -#endif // FEATURE_HIJACK && !TARGET_UNIX - - pTargetThread->ResumeThread(); - STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread 0x%x is in cooperative needs to rendezvous\n", pTargetThread); -#endif // !DISABLE_THREADSUSPEND + pTargetThread->Hijack(); } } else { if (pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) { - pTargetThread->ResetThreadState(Thread::TS_GCSuspendFlags); + pTargetThread->ResetThreadState(Thread::TS_GCSuspendPending); } } } @@ -3504,9 +3376,9 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) #ifdef PROFILING_SUPPORTED // If a profiler is keeping track of GC events, notify it { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - (&g_profControlBlock)->RuntimeSuspendFinished(); - END_PROFILER_CALLBACK(); + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + (&g_profControlBlock)->RuntimeSuspendFinished(); + END_PROFILER_CALLBACK(); } #endif // PROFILING_SUPPORTED @@ -3538,6 +3410,137 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Success\n"); } +void Thread::Hijack() +{ + if (!Thread::UseContextBasedThreadRedirection()) + { + // On platforms that have signal-like API, we do one of the following: + // - If we're on a Unix platform where hijacking is enabled, we attempt + // to inject an activation which will try to redirect or hijack the + // thread to get it to a safe point. + // + // - Similarly to above, if we're on a Windows platform where the special + // user-mode APC is available, that is used if redirection is necessary. + // + // - Otherwise, we rely on the GCPOLL mechanism enabled by + // TrapReturningThreads. +#ifdef FEATURE_THREAD_ACTIVATION + bool success = InjectActivation(Thread::ActivationReason::SuspendForGC); + if (!success) + { + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to inject an activation for thread %p.\n", this); + } +#endif // FEATURE_THREAD_ACTIVATION + return; + } + +#ifndef DISABLE_THREADSUSPEND + if (HasThreadStateOpportunistic(Thread::TS_GCSuspendRedirected)) + { + // We have seen this thead before and have redirected it. + // No point in suspending it again. It will not run hijackable code until it blocks on GC event. + return; + } + +#ifdef TIME_SUSPEND + DWORD startSuspend = g_SuspendStatistics.GetTime(); +#endif + + // + // Suspend the native thread. + // + + // We can not allocate memory after we suspend a thread. + // Otherwise, we may deadlock the process, because the thread we just suspended + // might hold locks we would need to acquire while allocating. + ThreadStore::AllocateOSContext(); + Thread::SuspendThreadResult str = SuspendThread(/*fOneTryOnly*/ TRUE); + + // We should just always build with this TIME_SUSPEND stuff, and report the results via ETW. +#ifdef TIME_SUSPEND + g_SuspendStatistics.osSuspend.Accumulate( + SuspendStatistics::GetElapsed(startSuspend, + g_SuspendStatistics.GetTime())); + + if (str == Thread::STR_Success) + g_SuspendStatistics.cntOSSuspendResume++; + else + g_SuspendStatistics.cntFailedSuspends++; +#endif + + switch (str) + { + case Thread::STR_Success: + // let's check the state of this one. + break; + + case Thread::STR_Forbidden: + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Suspending thread 0x%x forbidden\n", this); + return; + + case Thread::STR_NoStressLog: + STRESS_LOG2(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d\n", this, str); + return; + + case Thread::STR_UnstartedOrDead: + case Thread::STR_Failure: + STRESS_LOG3(LF_SYNC, LL_ERROR, " ERROR: Could not suspend thread 0x%x, result = %d, lastError = 0x%x\n", this, str, GetLastError()); + return; + } + + // the thread is suspended here, we can hijack, if it is stopped in hijackable location + + if (!m_fPreemptiveGCDisabled.LoadWithoutBarrier()) + { + // actually, we are done with this one + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive while suspending it is at a GC safe point\n", this); + ResetThreadState(Thread::TS_GCSuspendFlags); + ResumeThread(); + return; + } + + // We now know for sure that the thread is still in cooperative mode. If it's in JIT'd code, here + // is where we try to hijack/redirect the thread. If it's in VM code, we have to just let the VM + // finish what it's doing. + +#if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX) + { + Thread::WorkingOnThreadContextHolder workingOnThreadContext(this); + + // Note that pTargetThread->HandledJITCase is not a simple predicate - it actually will hijack the thread if that's possible. + // So HandledJITCase can do one of these: + // - Return TRUE, in which case it's our responsibility to redirect the thread + // - Return FALSE after hijacking the thread - we shouldn't try to redirect + // - Return FALSE but not hijack the thread - there's nothing we can do either + if (workingOnThreadContext.Acquired() && HandledJITCase()) + { + // Thread is in cooperative state and stopped in interruptible code. + // Redirect thread so we can capture a good thread context + // (GetThreadContext is not sufficient, due to an OS bug). + if (!CheckForAndDoRedirectForGC()) + { +#ifdef TIME_SUSPEND + g_SuspendStatistics.cntFailedRedirections++; +#endif + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Failed to CheckForAndDoRedirectForGC(). Thread %p\n", this); + } + else + { +#ifdef TIME_SUSPEND + g_SuspendStatistics.cntRedirections++; +#endif + SetThreadState(Thread::TS_GCSuspendRedirected); + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n", this); + } + } + } +#endif // FEATURE_HIJACK && !TARGET_UNIX + + ResumeThread(); + STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread 0x%x is in cooperative needs to rendezvous\n", this); +#endif // !DISABLE_THREADSUSPEND +} + #ifdef HAVE_GCCOVER void Thread::CommitGCStressInstructionUpdate() @@ -5689,6 +5692,8 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) // is in a function where we can safely handle an activation. BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread) { + // TODO: VS isn't it always for checkingCurrentThread? + Thread *pThread = GetThreadNULLOk(); // It is safe to call the ExecutionManager::IsManagedCode only if we are making the check for // a thread different from the current one or if the current thread is in the cooperative mode. @@ -5696,7 +5701,13 @@ BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread) // thread was holding the ExecutionManager's writer lock. // When the thread is in preemptive mode, we know for sure that it is not executing managed code. BOOL checkForManagedCode = !checkingCurrentThread || (pThread != NULL && pThread->PreemptiveGCDisabled()); - return checkForManagedCode && ExecutionManager::IsManagedCode(ip); + BOOL isSafePoint = checkForManagedCode && ExecutionManager::IsManagedCode(ip); + if (!isSafePoint) + { + pThread->m_hasPendingActivation = false; + } + + return isSafePoint; } // This function is called when thread suspension is pending. It tries to ensure that the current @@ -5716,6 +5727,14 @@ BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread) // void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) { + struct AutoClearPendingThreadActivation + { + ~AutoClearPendingThreadActivation() + { + GetThread()->m_hasPendingActivation = false; + } + } autoClearPendingThreadActivation; + Thread *pThread = GetThread(); if (pThread->PreemptiveGCDisabled() != TRUE) @@ -5756,6 +5775,7 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) frame.Push(pThread); + // TODO: VS pThread->PulseGCMode(); pThread->EnablePreemptiveGC(); pThread->DisablePreemptiveGC(); @@ -5830,14 +5850,6 @@ void Thread::ApcActivationCallback(ULONG_PTR Parameter) ActivationReason reason = (ActivationReason)pData->Parameter; PCONTEXT pContext = pData->ContextRecord; - struct AutoClearPendingThreadActivation - { - ~AutoClearPendingThreadActivation() - { - GetThread()->m_hasPendingActivation = false; - } - } autoClearPendingThreadActivation; - if (!CheckActivationSafePoint(GetIP(pContext), true /* checkingCurrentThread */)) { return; @@ -5859,15 +5871,17 @@ void Thread::ApcActivationCallback(ULONG_PTR Parameter) bool Thread::InjectActivation(ActivationReason reason) { -#ifdef FEATURE_SPECIAL_USER_MODE_APC - _ASSERTE(UseSpecialUserModeApc()); - + // Try to avoid sending signals/APCs when another one is in progress, + // as they may interrupt one another or queue up. + // Just one at a time is enough. More is not better here. if (m_hasPendingActivation) { - // Try to avoid nesting special user-mode APCs, as they can interrupt one another return true; } +#ifdef FEATURE_SPECIAL_USER_MODE_APC + _ASSERTE(UseSpecialUserModeApc()); + HANDLE hThread = GetThreadHandle(); if (hThread == INVALID_HANDLE_VALUE) { @@ -5887,12 +5901,15 @@ bool Thread::InjectActivation(ActivationReason reason) _ASSERTE((reason == ActivationReason::SuspendForGC) || (reason == ActivationReason::ThreadAbort) || (reason == ActivationReason::SuspendForDebugger)); static ConfigDWORD injectionEnabled; - if (injectionEnabled.val(CLRConfig::INTERNAL_ThreadSuspendInjection) == 0) - return false; - - HANDLE hThread = GetThreadHandle(); - if (hThread != INVALID_HANDLE_VALUE) - return ::PAL_InjectActivation(hThread); + if (injectionEnabled.val(CLRConfig::INTERNAL_ThreadSuspendInjection) != 0) + { + HANDLE hThread = GetThreadHandle(); + if (hThread != INVALID_HANDLE_VALUE) + { + m_hasPendingActivation = true; + return ::PAL_InjectActivation(hThread); + } + } return false; #else From 4ca217d2af5b8016a6e422e6e456e0402ac82db7 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 10:39:00 -0700 Subject: [PATCH 09/26] fix build for non-x64 windows --- src/coreclr/clrdefinitions.cmake | 1 + src/coreclr/vm/threads.h | 4 +- src/coreclr/vm/threadsuspend.cpp | 95 ++++++++++++++++---------------- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 1970b6c33c7544..79805d5bd669fc 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -223,6 +223,7 @@ if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) +# TODO: VS enable on arm64 if (CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC) endif() diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 96db26958f6a68..3cf845d331a655 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -1794,6 +1794,8 @@ class Thread STR_NoStressLog, }; + void Hijack(); + #ifdef FEATURE_THREAD_ACTIVATION enum class ActivationReason { @@ -1802,9 +1804,7 @@ class Thread ThreadAbort, }; - void Hijack(); bool InjectActivation(ActivationReason reason); - #endif // FEATURE_THREAD_ACTIVATION #ifndef DISABLE_THREADSUSPEND diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 4f445a36090eb6..edc56f1a1949b9 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2105,6 +2105,9 @@ void Thread::RareDisablePreemptiveGC() if (ThreadStore::HoldingThreadStore(this)) { + // A thread performing GC may swithch modes around locks inside GC. We will ignore that. + // Otherwise noone should be holding TSL and try switching to coop mode. + _ASSERTE(ThreadSuspend::GetSuspensionThread() == this); goto Exit; } @@ -2119,79 +2122,75 @@ void Thread::RareDisablePreemptiveGC() while (true) { - // TODO: VS can we actually hold TSL here? - if (!ThreadStore::HoldingThreadStore(this)) + // If debugger wants the thread to suspend, give the debugger precedence. + if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) { - // If debugger wants the thread to suspend, give the debugger precedence. - if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) - { #ifdef DEBUGGING_SUPPORTED - // We don't notify the debugger that this thread is now suspended. We'll just - // let the debugger's helper thread sweep and pick it up. - // We also never take the TSL in here either. - // Life's much simpler this way... + // We don't notify the debugger that this thread is now suspended. We'll just + // let the debugger's helper thread sweep and pick it up. + // We also never take the TSL in here either. + // Life's much simpler this way... #endif // DEBUGGING_SUPPORTED #ifdef FEATURE_HIJACK - // TODO: VS can we have hijacks here? why do we remove them? - // Remove any hijacks we might have. - UnhijackThread(); + // TODO: VS can we have hijacks here? why do we remove them? + // Remove any hijacks we might have. + UnhijackThread(); #endif // FEATURE_HIJACK #ifdef LOGGING - { - LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: debug suspended while switching to coop mode.\n", GetThreadId())); - } + { + LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: debug suspended while switching to coop mode.\n", GetThreadId())); + } #endif - // unsets TS_DebugSuspendPending | TS_SyncSuspended - WaitSuspendEvents(); + // unsets TS_DebugSuspendPending | TS_SyncSuspended + WaitSuspendEvents(); - // check again if we have something to do - continue; - } + // check again if we have something to do + continue; + } - if (GCHeapUtilities::IsGCInProgress()) - { - EnablePreemptiveGC(); + if (GCHeapUtilities::IsGCInProgress()) + { + EnablePreemptiveGC(); #ifdef PROFILING_SUPPORTED - // If profiler desires GC events, notify it that this thread is waiting until the GC is over - // Do not send suspend notifications for debugger suspensions + // If profiler desires GC events, notify it that this thread is waiting until the GC is over + // Do not send suspend notifications for debugger suspensions + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + if (!(m_State & TS_DebugSuspendPending)) { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - if (!(m_State & TS_DebugSuspendPending)) - { - (&g_profControlBlock)->RuntimeThreadSuspended((ThreadID)this); - } - END_PROFILER_CALLBACK(); + (&g_profControlBlock)->RuntimeThreadSuspended((ThreadID)this); } + END_PROFILER_CALLBACK(); + } #endif // PROFILING_SUPPORTED #if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX) - ResetThreadState(Thread::TS_GCSuspendRedirected); + ResetThreadState(Thread::TS_GCSuspendRedirected); #endif - DWORD status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete(); - if (status != S_OK) - { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Waiting for GC completion failed")); - } + DWORD status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete(); + if (status != S_OK) + { + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Waiting for GC completion failed")); + } #ifdef PROFILING_SUPPORTED - // Let the profiler know that this thread is resuming - { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - (&g_profControlBlock)->RuntimeThreadResumed((ThreadID)this); - END_PROFILER_CALLBACK(); - } + // Let the profiler know that this thread is resuming + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + (&g_profControlBlock)->RuntimeThreadResumed((ThreadID)this); + END_PROFILER_CALLBACK(); + } #endif // PROFILING_SUPPORTED - // disable preemptive gc. - m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); + // disable preemptive gc. + m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - // check again if we have something to do - continue; - } + // check again if we have something to do + continue; } if (HasThreadState(TS_StackCrawlNeeded)) From fb2c654cb90358aacf169f6761167a781cb50bce Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 11:45:52 -0700 Subject: [PATCH 10/26] assert noone holds TSL into coop mode --- src/coreclr/vm/threadsuspend.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index edc56f1a1949b9..01d18aaaf94150 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2103,14 +2103,16 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } - if (ThreadStore::HoldingThreadStore(this)) + // A thread performing GC may try swithch modes around locks inside GC. We will ignore that. + // (NativeAOT analog scenario would have the thread in DoNotTriggerGc mode) + if (ThreadSuspend::GetSuspensionThread() == this) { - // A thread performing GC may swithch modes around locks inside GC. We will ignore that. - // Otherwise noone should be holding TSL and try switching to coop mode. - _ASSERTE(ThreadSuspend::GetSuspensionThread() == this); goto Exit; } + // Noone else should be holding TSL and try switching to coop mode. + _ASSERTE(!ThreadStore::HoldingThreadStore(this)); + STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); #if defined(STRESS_HEAP) && defined(_DEBUG) From 1fb68ea78bdc900cf714887ad0bf658836cc3fed Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:07:46 -0700 Subject: [PATCH 11/26] activation safety check is always for the current thread --- src/coreclr/pal/inc/pal.h | 2 +- src/coreclr/pal/src/exception/signal.cpp | 2 +- src/coreclr/vm/threads.h | 2 +- src/coreclr/vm/threadsuspend.cpp | 30 +++++++++++++----------- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 70b934a8ccd88e..340e4933bcd31a 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -3772,7 +3772,7 @@ PALAPI FlushProcessWriteBuffers(); typedef void (*PAL_ActivationFunction)(CONTEXT *context); -typedef BOOL (*PAL_SafeActivationCheckFunction)(SIZE_T ip, BOOL checkingCurrentThread); +typedef BOOL (*PAL_SafeActivationCheckFunction)(SIZE_T ip); PALIMPORT VOID diff --git a/src/coreclr/pal/src/exception/signal.cpp b/src/coreclr/pal/src/exception/signal.cpp index 0da7f05001b905..c9112432f8d930 100644 --- a/src/coreclr/pal/src/exception/signal.cpp +++ b/src/coreclr/pal/src/exception/signal.cpp @@ -833,7 +833,7 @@ static void inject_activation_handler(int code, siginfo_t *siginfo, void *contex &winContext, contextFlags); - if (g_safeActivationCheckFunction(CONTEXTGetPC(&winContext), /* checkingCurrentThread */ TRUE)) + if (g_safeActivationCheckFunction(CONTEXTGetPC(&winContext))) { g_inject_activation_context_locvar_offset = (int)((char*)&winContext - (char*)__builtin_frame_address(0)); int savedErrNo = errno; // Make sure that errno is not modified diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 3cf845d331a655..8b9d1251a1a0ba 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -548,7 +548,7 @@ class Thread friend void STDCALL OnHijackWorker(HijackArgs * pArgs); #ifdef FEATURE_THREAD_ACTIVATION friend void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext); - friend BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread); + friend BOOL CheckActivationSafePoint(SIZE_T ip); #endif // FEATURE_THREAD_ACTIVATION #endif // FEATURE_HIJACK diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 01d18aaaf94150..00f2a13578da82 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2152,6 +2152,7 @@ void Thread::RareDisablePreemptiveGC() continue; } + // TODO: VS should check if suspension is requested. if (GCHeapUtilities::IsGCInProgress()) { EnablePreemptiveGC(); @@ -5691,24 +5692,25 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) // This function is called by a thread activation to check if the specified instruction pointer // is in a function where we can safely handle an activation. -BOOL CheckActivationSafePoint(SIZE_T ip, BOOL checkingCurrentThread) +// WARNING: This method is called by suspension while one thread is interrupted +// in a random location, possibly holding random locks. +// It is unsafe to use blocking APIs or allocate in this method. +BOOL CheckActivationSafePoint(SIZE_T ip) { - // TODO: VS isn't it always for checkingCurrentThread? - Thread *pThread = GetThreadNULLOk(); - // It is safe to call the ExecutionManager::IsManagedCode only if we are making the check for - // a thread different from the current one or if the current thread is in the cooperative mode. - // Otherwise ExecutionManager::IsManagedCode could deadlock if the activation happened when the - // thread was holding the ExecutionManager's writer lock. - // When the thread is in preemptive mode, we know for sure that it is not executing managed code. - BOOL checkForManagedCode = !checkingCurrentThread || (pThread != NULL && pThread->PreemptiveGCDisabled()); - BOOL isSafePoint = checkForManagedCode && ExecutionManager::IsManagedCode(ip); - if (!isSafePoint) + + // The criteria for safe activation is to be running managed code. + // Also we are not interested in handling interruption if we are already in preemptive mode. + BOOL isActivationSafePoint = pThread != NULL && + pThread->PreemptiveGCDisabled() && + ExecutionManager::IsManagedCode(ip); + + if (!isActivationSafePoint) { pThread->m_hasPendingActivation = false; } - return isSafePoint; + return isActivationSafePoint; } // This function is called when thread suspension is pending. It tries to ensure that the current @@ -5745,7 +5747,7 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) // This function can only be called when the interrupted thread is in // an activation safe point. - _ASSERTE(CheckActivationSafePoint(ip, /* checkingCurrentThread */ TRUE)); + _ASSERTE(CheckActivationSafePoint(ip)); Thread::WorkingOnThreadContextHolder workingOnThreadContext(pThread); if (!workingOnThreadContext.Acquired()) @@ -5851,7 +5853,7 @@ void Thread::ApcActivationCallback(ULONG_PTR Parameter) ActivationReason reason = (ActivationReason)pData->Parameter; PCONTEXT pContext = pData->ContextRecord; - if (!CheckActivationSafePoint(GetIP(pContext), true /* checkingCurrentThread */)) + if (!CheckActivationSafePoint(GetIP(pContext))) { return; } From e5ce033ac101807d69ee44a03cadf8cd41bddd11 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 12:54:36 -0700 Subject: [PATCH 12/26] undo comment --- src/coreclr/clrdefinitions.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 79805d5bd669fc..1970b6c33c7544 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -223,7 +223,6 @@ if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) -# TODO: VS enable on arm64 if (CLR_CMAKE_TARGET_WIN32 AND CLR_CMAKE_TARGET_ARCH_AMD64) add_definitions(-DFEATURE_SPECIAL_USER_MODE_APC) endif() From c26364ae0dd0429484a0d2aba6930f67ab2450a7 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 13:23:05 -0700 Subject: [PATCH 13/26] PulseGCMode should not check for CatchAtSafePointOpportunistic --- src/coreclr/vm/threadsuspend.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 00f2a13578da82..abe58a0deb508d 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2105,7 +2105,7 @@ void Thread::RareDisablePreemptiveGC() // A thread performing GC may try swithch modes around locks inside GC. We will ignore that. // (NativeAOT analog scenario would have the thread in DoNotTriggerGc mode) - if (ThreadSuspend::GetSuspensionThread() == this) + if (IsGCSpecialThread() || ThreadSuspend::GetSuspensionThread() == this) { goto Exit; } @@ -2362,8 +2362,7 @@ void Thread::PulseGCMode() _ASSERTE(this == GetThread()); - // TODO: VS no need to check, but assert coop - if (PreemptiveGCDisabled() && CatchAtSafePointOpportunistic()) + if (PreemptiveGCDisabled()) { EnablePreemptiveGC(); DisablePreemptiveGC(); @@ -5778,9 +5777,7 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) frame.Push(pThread); - // TODO: VS pThread->PulseGCMode(); - pThread->EnablePreemptiveGC(); - pThread->DisablePreemptiveGC(); + pThread->PulseGCMode(); INSTALL_MANAGED_EXCEPTION_DISPATCHER; INSTALL_UNWIND_AND_CONTINUE_HANDLER; From 1dbd95b00ed90f7a97391d24ca3e4afa153e9946 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 17:43:37 -0700 Subject: [PATCH 14/26] not disabling preempt while holding TSL --- src/coreclr/vm/threadsuspend.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index abe58a0deb508d..1a317fcf9a39ef 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2103,9 +2103,8 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } - // A thread performing GC may try swithch modes around locks inside GC. We will ignore that. - // (NativeAOT analog scenario would have the thread in DoNotTriggerGc mode) - if (IsGCSpecialThread() || ThreadSuspend::GetSuspensionThread() == this) + // TODO: VS why can we see this? + if (ThreadStore::HoldingThreadStore(this)) { goto Exit; } From cf19edd1535eda419a0a955b0cb1af80718fe666 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 21:03:04 -0700 Subject: [PATCH 15/26] tweak --- src/coreclr/vm/threads.h | 2 +- src/coreclr/vm/threadsuspend.cpp | 39 ++++++++++++++------------------ 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 8b9d1251a1a0ba..90d8c4cb1903c8 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -2650,7 +2650,7 @@ class Thread // For suspends. The thread waits on this event. A client sets the event to cause // the thread to resume. - void WaitSuspendEvents(BOOL fDoWait = TRUE); + void WaitSuspendEvents(); BOOL WaitSuspendEventsHelper(void); // Helpers to ensure that the bits for suspension and the number of active diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 1a317fcf9a39ef..2868c3f3b6f368 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -4519,7 +4519,7 @@ BOOL Thread::WaitSuspendEventsHelper(void) // There's a bit of a workaround here -void Thread::WaitSuspendEvents(BOOL fDoWait) +void Thread::WaitSuspendEvents() { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -4528,35 +4528,29 @@ void Thread::WaitSuspendEvents(BOOL fDoWait) _ASSERTE((m_State & TS_SyncSuspended) == 0); // Let us do some useful work before suspending ourselves. - - // If we're required to perform a wait, do so. Typically, this is - // skipped if this thread is a Debugger Special Thread. - if (fDoWait) + while (TRUE) { - while (TRUE) - { - WaitSuspendEventsHelper(); + WaitSuspendEventsHelper(); - ThreadState oldState = m_State; + ThreadState oldState = m_State; + // + // If all reasons to suspend are off, we think we can exit + // this loop, but we need to check atomically. + // + if ((oldState & TS_DebugSuspendPending) == 0) + { // - // If all reasons to suspend are off, we think we can exit - // this loop, but we need to check atomically. + // Construct the destination state we desire - all suspension bits turned off. // - if ((oldState & TS_DebugSuspendPending) == 0) + ThreadState newState = (ThreadState)(oldState & ~(TS_DebugSuspendPending | TS_SyncSuspended)); + + if (InterlockedCompareExchange((LONG *)&m_State, newState, oldState) == (LONG)oldState) { // - // Construct the destination state we desire - all suspension bits turned off. + // We are done. // - ThreadState newState = (ThreadState)(oldState & ~(TS_DebugSuspendPending | TS_SyncSuspended)); - - if (InterlockedCompareExchange((LONG *)&m_State, newState, oldState) == (LONG)oldState) - { - // - // We are done. - // - break; - } + break; } } } @@ -4628,6 +4622,7 @@ void Thread::HijackThread(ReturnKind returnKind, ExecutionState *esb) SetHijackReturnKind(returnKind); + // TODO: VS ensure that new hijack is better if (m_State & TS_Hijacked) UnhijackThread(); From ca6e83e7779e999b1fa3df4dd4dfc45d0ef8f746 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sun, 28 Apr 2024 21:04:00 -0700 Subject: [PATCH 16/26] dead assert --- src/coreclr/vm/threadsuspend.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 2868c3f3b6f368..6f54c0cd3b5826 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2109,9 +2109,6 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } - // Noone else should be holding TSL and try switching to coop mode. - _ASSERTE(!ThreadStore::HoldingThreadStore(this)); - STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); #if defined(STRESS_HEAP) && defined(_DEBUG) From 15acfe2e2de4c53bbd6bb6e044e8f83b7da79277 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 29 Apr 2024 10:26:33 -0700 Subject: [PATCH 17/26] tweak RareDisablePreemptiveGC --- src/coreclr/vm/threadsuspend.cpp | 33 ++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 6f54c0cd3b5826..5811ed1243e232 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2092,10 +2092,11 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } + _ASSERTE (m_fPreemptiveGCDisabled); + // Holding a spin lock in preemp mode and switch to coop mode could cause other threads spinning // waiting for GC _ASSERTE ((m_StateNC & Thread::TSNC_OwnsSpinLock) == 0); - _ASSERTE(!MethodDescBackpatchInfoTracker::IsLockOwnedByCurrentThread() || IsInForbidSuspendForDebuggerRegion()); if (!GCHeapUtilities::IsGCHeapInitialized()) @@ -2112,23 +2113,35 @@ void Thread::RareDisablePreemptiveGC() STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); #if defined(STRESS_HEAP) && defined(_DEBUG) - if (!IsDetached()) + if (GCStressPolicy::IsEnabled() && GCStress::IsEnabled() && !IsDetached()) { + EnablePreemptiveGC(); PerformPreemptiveGC(); + // disable preemptive gc. + m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); } #endif + // The general strategy of this loop: + // - we check for additional conditions while in coop mode. + // - if there is something, we had to do before going to coop mode (such as wait for GC), we + // - revert to preempt + // - perform additional work + // - set coop mode and do the loop again + // + // NOTE: It is important that the check is done in coop mode, at least for the GC handshake, + // as per contract with the setter of the conditions, we have to check the condition _before_ + // switching to coop mode. while (true) { +#ifdef DEBUGGING_SUPPORTED // If debugger wants the thread to suspend, give the debugger precedence. if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) { -#ifdef DEBUGGING_SUPPORTED + EnablePreemptiveGC(); + // We don't notify the debugger that this thread is now suspended. We'll just // let the debugger's helper thread sweep and pick it up. - // We also never take the TSL in here either. - // Life's much simpler this way... -#endif // DEBUGGING_SUPPORTED #ifdef FEATURE_HIJACK // TODO: VS can we have hijacks here? why do we remove them? @@ -2144,9 +2157,13 @@ void Thread::RareDisablePreemptiveGC() // unsets TS_DebugSuspendPending | TS_SyncSuspended WaitSuspendEvents(); + // disable preemptive gc. + m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); + // check again if we have something to do continue; } +#endif // DEBUGGING_SUPPORTED // TODO: VS should check if suspension is requested. if (GCHeapUtilities::IsGCInProgress()) @@ -2194,8 +2211,12 @@ void Thread::RareDisablePreemptiveGC() if (HasThreadState(TS_StackCrawlNeeded)) { + EnablePreemptiveGC(); ThreadStore::WaitForStackCrawlEvent(); + // disable preemptive gc. + m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); + // check again if we have something to do continue; } From c1c20f7060bb440452448048e10b7d079cd76b64 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 29 Apr 2024 13:44:26 -0700 Subject: [PATCH 18/26] RareDisablePreemptiveGC avoid GetSuspensionThread() --- src/coreclr/vm/threadsuspend.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 5811ed1243e232..328c4ce4d1fcf4 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2104,6 +2104,11 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } + if (this == ThreadSuspend::GetSuspensionThread()) + { + goto Exit; + } + // TODO: VS why can we see this? if (ThreadStore::HoldingThreadStore(this)) { From 5c027231b0bc7bd55bdfff44f37b305945f38b05 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 29 Apr 2024 17:06:58 -0700 Subject: [PATCH 19/26] updated Thread::Hijack --- src/coreclr/vm/threads.h | 1 - src/coreclr/vm/threadsuspend.cpp | 23 +++++++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 90d8c4cb1903c8..37892260ce20ac 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -4284,7 +4284,6 @@ class ThreadStore // this means that no thread sharing the same scheduler with T2 can run. If T1 needs a lock which // is owned by one thread on the scheduler, T1 will wait forever. // Our solution is to move T2 to a safe point, resume it, and then do stack crawl. - // TODO: VS verify if above is true static CLREvent *s_pWaitForStackCrawlEvent; public: static void WaitForStackCrawlEvent() diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 328c4ce4d1fcf4..55cd8fb2e1fefb 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -3308,6 +3308,8 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) } } + _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); + // From this point until the end of the function, consider all active thread // suspension to be in progress. This is mainly to give the profiler API a hint // that trying to suspend a thread (in order to walk its stack) could delay the @@ -3321,18 +3323,11 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) // See VSW 475315 and 488918 for details. ::FlushProcessWriteBuffers(); - // - // Make a pass through all threads. We do a couple of things here: - // 1) we count the number of threads that are observed to be in cooperative mode. - // 2) for threads currently running managed code, we try to redirect/jihack them. - int retries = 0; int prevRemaining = 0; int remaining = 0; bool observeOnly = false; - _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); - while(true) { prevRemaining = remaining; @@ -3344,10 +3339,6 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) if (pTargetThread == pCurThread) continue; - // GC threads can not be forced to run preemptively, so we will not try. - if (pTargetThread->IsGCSpecial()) - continue; - if (pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) { if (!pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) @@ -3435,6 +3426,12 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) void Thread::Hijack() { + if (IsGCSpecial()) + { + // GC threads can not be forced to run preemptively, so we will not try. + return; + } + if (!Thread::UseContextBasedThreadRedirection()) { // On platforms that have signal-like API, we do one of the following: @@ -4645,7 +4642,6 @@ void Thread::HijackThread(ReturnKind returnKind, ExecutionState *esb) SetHijackReturnKind(returnKind); - // TODO: VS ensure that new hijack is better if (m_State & TS_Hijacked) UnhijackThread(); @@ -5671,7 +5667,6 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) // If someone's trying to suspend *this* thread, this is a good opportunity. if (pCurThread && pCurThread->CatchAtSafePointOpportunistic()) { - pCurThread->PulseGCMode(); // Go suspend myself. } else @@ -5756,7 +5751,7 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) Thread *pThread = GetThread(); - if (pThread->PreemptiveGCDisabled() != TRUE) + if (pThread->PreemptiveGCDisabled()) return; PCODE ip = GetIP(interruptedContext); From 827d4de09521c0a5ce341eadabfa499a884eb70d Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Mon, 29 Apr 2024 20:31:57 -0700 Subject: [PATCH 20/26] fix typo --- src/coreclr/vm/threadsuspend.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 55cd8fb2e1fefb..359953bdee03c4 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2104,16 +2104,16 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } + // A thread that performs GC may switch modes inside GC and come here. + // We will not try suspending a thread that is responsible for the suspension. if (this == ThreadSuspend::GetSuspensionThread()) { goto Exit; } - // TODO: VS why can we see this? - if (ThreadStore::HoldingThreadStore(this)) - { - goto Exit; - } + // Regular threads should not try getting into coop mode + // while holding TS lock. + _ASSERTE(!ThreadStore::HoldingThreadStore(this)); STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); @@ -2149,7 +2149,6 @@ void Thread::RareDisablePreemptiveGC() // let the debugger's helper thread sweep and pick it up. #ifdef FEATURE_HIJACK - // TODO: VS can we have hijacks here? why do we remove them? // Remove any hijacks we might have. UnhijackThread(); #endif // FEATURE_HIJACK @@ -5751,7 +5750,7 @@ void HandleSuspensionForInterruptedThread(CONTEXT *interruptedContext) Thread *pThread = GetThread(); - if (pThread->PreemptiveGCDisabled()) + if (pThread->PreemptiveGCDisabled() != TRUE) return; PCODE ip = GetIP(interruptedContext); From 8b9b7a33215ac126dc9cd47b7a8ee068dad0259e Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 30 Apr 2024 00:11:11 -0700 Subject: [PATCH 21/26] allow coop mode while holding TS lock --- src/coreclr/vm/threadsuspend.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 359953bdee03c4..51171ede811503 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2111,9 +2111,12 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } - // Regular threads should not try getting into coop mode - // while holding TS lock. - _ASSERTE(!ThreadStore::HoldingThreadStore(this)); + if (ThreadStore::HoldingThreadStore(this)) + { + // In theory threads should not try entering coop mode while holding TS lock, + // but some scenarios like GCCoopHackNoThread end up here + goto Exit; + } STRESS_LOG1(LF_SYNC, LL_INFO1000, "RareDisablePreemptiveGC: entering. Thread state = %x\n", m_State.Load()); From bad07f4a7d15db05a41fbdde5a05557c14892f6f Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 30 Apr 2024 08:29:21 -0700 Subject: [PATCH 22/26] Refactored into SuspendAllThreads/ResumeAllThreads --- src/coreclr/vm/threadsuspend.cpp | 381 +++++++++++++++---------------- src/coreclr/vm/threadsuspend.h | 4 +- 2 files changed, 192 insertions(+), 193 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 51171ede811503..305e3314b16326 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2046,7 +2046,6 @@ extern void WaitForEndOfShutdown(); //---------------------------------------------------------------------------- // A note on SUSPENSIONS. -// TODO: VS comment needs updating? // We must not suspend a thread while it is holding the ThreadStore lock, or // the lock on the thread. Why? Because we need those locks to resume the // thread (and to perform a GC, use the debugger, spawn or kill threads, etc.) @@ -3246,7 +3245,7 @@ void SpinWait(int iteration, int usecLimit) // which leaves cooperative mode and waits for the GC to complete. // // See code:Thread#SuspendingTheRuntime for more -void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) +void ThreadSuspend::SuspendAllThreads() { CONTRACTL { NOTHROW; @@ -3261,64 +3260,40 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) } CONTRACTL_END; - // This thread - Thread *pCurThread = GetThreadNULLOk(); + STRESS_LOG(LF_SYNC, LL_INFO1000, "Thread::SuspendAllThreads()\n"); + + // From this point until the end of the function, consider all active thread + // suspension to be in progress. This is mainly to give the profiler API a hint + // that trying to suspend a thread (in order to walk its stack) could delay the + // overall EE suspension. So the profiler API would early-abort the stackwalk + // in such a case. + SuspendRuntimeInProgressHolder hldSuspendRuntimeInProgress; // Caller is expected to be holding the ThreadStore lock. Also, caller must // have set GcInProgress before coming here, or things will break; _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); - _ASSERTE(GCHeapUtilities::IsGCInProgress() ); - - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime(reason=0x%x)\n", reason); + // This thread + Thread *pCurThread = GetThreadNULLOk(); -#ifdef PROFILING_SUPPORTED - // If the profiler desires information about GCs, then let it know that one - // is starting. - { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER); - _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER_SWEEP); + + // + // Remember that we're the one suspending the EE + // + g_pSuspensionThread = pCurThread; - { - (&g_profControlBlock)->RuntimeSuspendStarted( - GCSuspendReasonToProfSuspendReason(reason)); - } - if (pCurThread) - { - // Notify the profiler that the thread that is actually doing the GC is 'suspended', - // meaning that it is doing stuff other than run the managed code it was before the - // GC started. - (&g_profControlBlock)->RuntimeThreadSuspended((ThreadID)pCurThread); - } - END_PROFILER_CALLBACK(); - } -#endif // PROFILING_SUPPORTED + // + // First, we reset the event that we're about to tell other threads to wait for. + // + GCHeapUtilities::GetGCHeap()->ResetWaitForGCEvent(); // - // If this thread is running at low priority, boost its priority. We remember the old - // priority so that we can restore it in ResumeRuntime. + // Tell all threads, globally, to wait for WaitForGCEvent. // - if (pCurThread) // concurrent GC occurs on threads we don't know about - { - _ASSERTE(pCurThread->m_Priority == INVALID_THREAD_PRIORITY); - int priority = pCurThread->GetThreadPriority(); - if (priority < THREAD_PRIORITY_NORMAL) - { - pCurThread->m_Priority = priority; - pCurThread->SetThreadPriority(THREAD_PRIORITY_NORMAL); - } - } + ThreadStore::TrapReturningThreads(TRUE); _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); - // From this point until the end of the function, consider all active thread - // suspension to be in progress. This is mainly to give the profiler API a hint - // that trying to suspend a thread (in order to walk its stack) could delay the - // overall EE suspension. So the profiler API would early-abort the stackwalk - // in such a case. - SuspendRuntimeInProgressHolder hldSuspendRuntimeInProgress; - // Flush the store buffers on all CPUs, to ensure two things: // - we get a reliable reading of the threads' m_fPreemptiveGCDisabled state // - other threads see that g_TrapReturningThreads is set @@ -3389,41 +3364,17 @@ void ThreadSuspend::SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason) } } -#ifdef PROFILING_SUPPORTED - // If a profiler is keeping track of GC events, notify it - { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - (&g_profControlBlock)->RuntimeSuspendFinished(); - END_PROFILER_CALLBACK(); - } -#endif // PROFILING_SUPPORTED - -#ifdef _DEBUG - if (reason == ThreadSuspend::SUSPEND_FOR_GC) - { - Thread* thread = NULL; - while ((thread = ThreadStore::GetThreadList(thread)) != NULL) - { - thread->DisableStressHeap(); - _ASSERTE(!thread->HasThreadState(Thread::TS_GCSuspendPending)); - } - } -#endif - -#ifdef HAVE_GCCOVER - // - // Now that the EE has been suspended, let's see if any oustanding - // gcstress instruction updates need to occur. Each thread can - // have only one pending at a time. - // - Thread* thread = NULL; - while ((thread = ThreadStore::GetThreadList(thread)) != NULL) - { - thread->CommitGCStressInstructionUpdate(); - } -#endif // HAVE_GCCOVER +#if defined(TARGET_ARM) || defined(TARGET_ARM64) + // Flush the store buffers on all CPUs, to ensure that all changes made so far are seen + // by the GC threads. This only matters on weak memory ordered processors as + // the strong memory ordered processors wouldn't have reordered the relevant writes. + // This is needed to synchronize threads that were running in preemptive mode thus were + // left alone by suspension to flush their writes that they made before they switched to + // preemptive mode. + ::FlushProcessWriteBuffers(); +#endif //TARGET_ARM || TARGET_ARM64 - STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Success\n"); + STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::SuspendAllThreads() - Success\n"); } void Thread::Hijack() @@ -3450,7 +3401,7 @@ void Thread::Hijack() bool success = InjectActivation(Thread::ActivationReason::SuspendForGC); if (!success) { - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Failed to inject an activation for thread %p.\n", this); + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::Hijack() - Failed to inject an activation for thread %p.\n", this); } #endif // FEATURE_THREAD_ACTIVATION return; @@ -3552,7 +3503,7 @@ void Thread::Hijack() g_SuspendStatistics.cntRedirections++; #endif SetThreadState(Thread::TS_GCSuspendRedirected); - STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::SuspendRuntime() - Thread %p redirected().\n", this); + STRESS_LOG1(LF_SYNC, LL_INFO1000, "Thread::Hijack() - Thread %p redirected().\n", this); } } } @@ -3623,7 +3574,7 @@ void EnableStressHeapHelper() // We're done with our GC. Let all the threads run again. // By this point we've already unblocked most threads. This just releases the ThreadStore lock. -void ThreadSuspend::ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceeded) +void ThreadSuspend::ResumeAllThreads(BOOL SuspendSucceeded) { CONTRACTL { NOTHROW; @@ -3631,72 +3582,48 @@ void ThreadSuspend::ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceeded) } CONTRACTL_END; - Thread *pCurThread = GetThreadNULLOk(); - - // Caller is expected to be holding the ThreadStore lock. But they must have - // reset GcInProgress, or threads will continue to suspend themselves and won't - // be resumed until the next GC. - _ASSERTE(IsGCSpecialThread() || ThreadStore::HoldingThreadStore()); - _ASSERTE(!GCHeapUtilities::IsGCInProgress() ); - - STRESS_LOG2(LF_SYNC, LL_INFO1000, "Thread::ResumeRuntime(finishedGC=%d, SuspendSucceeded=%d) - Start\n", bFinishedGC, SuspendSucceeded); - + // Caller is expected to be holding the ThreadStore lock since we will be iterating threads. + _ASSERTE(ThreadStore::HoldingThreadStore()); // - // Notify everyone who cares, that this suspension is over, and this thread is going to go do other things. + // Unhijack all threads, and reset their "suspend pending" flags. // - - -#ifdef PROFILING_SUPPORTED - // Need to give resume event for the GC thread + Thread *thread = NULL; + while ((thread = ThreadStore::GetThreadList(thread)) != NULL) { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - if (pCurThread) - { - (&g_profControlBlock)->RuntimeThreadResumed((ThreadID)pCurThread); - } - END_PROFILER_CALLBACK(); + thread->PrepareForEERestart(SuspendSucceeded); } -#endif // PROFILING_SUPPORTED -#ifdef TIME_SUSPEND - DWORD startRelease = g_SuspendStatistics.GetTime(); -#endif +#if defined(TARGET_ARM) || defined(TARGET_ARM64) + // Flush the store buffers on all CPUs, to ensure that they all see changes made + // by the GC threads. This only matters on weak memory ordered processors as + // the strong memory ordered processors wouldn't have reordered the relevant reads. + // This is needed to synchronize threads that were running in preemptive mode while + // the runtime was suspended and that will return to cooperative mode after the runtime + // is restarted. + ::FlushProcessWriteBuffers(); +#endif //TARGET_ARM || TARGET_ARM64 // - // Unlock the thread store. At this point, all threads should be allowed to run. - // - ThreadSuspend::UnlockThreadStore(); - -#ifdef TIME_SUSPEND - g_SuspendStatistics.releaseTSL.Accumulate(SuspendStatistics::GetElapsed(startRelease, - g_SuspendStatistics.GetTime())); -#endif - -#ifdef PROFILING_SUPPORTED + // Allow threads to enter COOP mode (though we still need to wake the ones + // that we hijacked). // - // This thread is logically "resuming" from a GC now. Tell the profiler. + // Note: this is the last barrier that keeps managed threads + // from entering cooperative mode. If the sequence changes, + // you may have to change routine GCHeapUtilities::SafeToRestartManagedThreads + // as well. // - { - BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); - GCX_PREEMP(); - (&g_profControlBlock)->RuntimeResumeFinished(); - END_PROFILER_CALLBACK(); - } -#endif // PROFILING_SUPPORTED + ThreadStore::TrapReturningThreads(FALSE); + g_pSuspensionThread = 0; // - // If we raised this thread's priority in SuspendRuntime, we restore it here. + // Any threads that are waiting in WaitUntilGCComplete will continue now. // - if (pCurThread) - { - if (pCurThread->m_Priority != INVALID_THREAD_PRIORITY) - { - pCurThread->SetThreadPriority(pCurThread->m_Priority); - pCurThread->m_Priority = INVALID_THREAD_PRIORITY; - } - } + GCHeapUtilities::GetGCHeap()->SetWaitForGCEvent(); + _ASSERTE(IsGCSpecialThread() || ThreadStore::HoldingThreadStore()); - STRESS_LOG0(LF_SYNC, LL_INFO1000, "Thread::ResumeRuntime() - End\n"); + + STRESS_LOG1(LF_SYNC, LL_INFO1000, "ResumeAllThreads(SuspendSucceeded=%d) - Start\n", SuspendSucceeded); + STRESS_LOG0(LF_SYNC, LL_INFO1000, "ResumeAllThreads() - End\n"); } #ifndef TARGET_UNIX @@ -3900,7 +3827,7 @@ BOOL Thread::HandleJITCaseForAbort() // Threads suspended by ClrSuspendThread() are resumed in two ways. If we // suspended them in error, they are resumed via ClrResumeThread(). But if // this is the HandledJIT() case and the thread is in fully interruptible code, we -// can resume them under special control. ResumeRuntime and UserResume are cases +// can resume them under special control. ResumeAllThreads and UserResume are cases // of this. // // The suspension has done its work (e.g. GC or user thread suspension). But during @@ -5479,41 +5406,69 @@ void ThreadSuspend::RestartEE(BOOL bFinishedGC, BOOL SuspendSucceeded) } #endif // PROFILING_SUPPORTED - // - // Unhijack all threads, and reset their "suspend pending" flags. Why isn't this in - // Thread::ResumeRuntime? - // - Thread *thread = NULL; - while ((thread = ThreadStore::GetThreadList(thread)) != NULL) - { - thread->PrepareForEERestart(SuspendSucceeded); - } - // // Revert to being a normal thread // ClrFlsClearThreadType (ThreadType_DynamicSuspendEE); GCHeapUtilities::GetGCHeap()->SetGCInProgress(false); + ResumeAllThreads(SuspendSucceeded); + // - // Allow threads to enter COOP mode (though we still need to wake the ones - // that we hijacked). + // Notify everyone who cares, that this suspension is over, and this thread is going to go do other things. // - // Note: this is the last barrier that keeps managed threads - // from entering cooperative mode. If the sequence changes, - // you may have to change routine GCHeapUtilities::SafeToRestartManagedThreads - // as well. + + Thread *pCurThread = GetThreadNULLOk(); + +#ifdef PROFILING_SUPPORTED + // Need to give resume event for the GC thread + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + if (pCurThread) + { + (&g_profControlBlock)->RuntimeThreadResumed((ThreadID)pCurThread); + } + END_PROFILER_CALLBACK(); + } +#endif // PROFILING_SUPPORTED + +#ifdef TIME_SUSPEND + DWORD startRelease = g_SuspendStatistics.GetTime(); +#endif + // - ThreadStore::TrapReturningThreads(FALSE); - g_pSuspensionThread = 0; + // Unlock the thread store. At this point, all threads should be allowed to run. + // + ThreadSuspend::UnlockThreadStore(); +#ifdef TIME_SUSPEND + g_SuspendStatistics.releaseTSL.Accumulate(SuspendStatistics::GetElapsed(startRelease, + g_SuspendStatistics.GetTime())); +#endif + +#ifdef PROFILING_SUPPORTED // - // Any threads that are waiting in WaitUntilGCComplete will continue now. + // This thread is logically "resuming" from a GC now. Tell the profiler. // - GCHeapUtilities::GetGCHeap()->SetWaitForGCEvent(); - _ASSERTE(IsGCSpecialThread() || ThreadStore::HoldingThreadStore()); + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + GCX_PREEMP(); + (&g_profControlBlock)->RuntimeResumeFinished(); + END_PROFILER_CALLBACK(); + } +#endif // PROFILING_SUPPORTED - ResumeRuntime(bFinishedGC, SuspendSucceeded); + // + // If we raised this thread's priority in SuspendRuntime, we restore it here. + // + if (pCurThread) + { + if (pCurThread->m_Priority != INVALID_THREAD_PRIORITY) + { + pCurThread->SetThreadPriority(pCurThread->m_Priority); + pCurThread->m_Priority = INVALID_THREAD_PRIORITY; + } + } FireEtwGCRestartEEEnd_V1(GetClrInstanceId()); @@ -5527,13 +5482,13 @@ void ThreadSuspend::RestartEE(BOOL bFinishedGC, BOOL SuspendSucceeded) // SuspendEE: // LockThreadStore // SetGCInProgress -// SuspendRuntime +// SuspendAllThreads // // ... perform the GC ... // // RestartEE: // SetGCDone -// ResumeRuntime +// ResumeAllThreads // calls UnlockThreadStore // // Note that this is intentionally *not* symmetrical. The EE will assert that the @@ -5587,24 +5542,6 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) g_SuspendStatistics.GetTime())); #endif - // - // Now we're going to acquire an exclusive lock on managed code execution (including - // "manually managed" code in GCX_COOP regions). - // - // First, we reset the event that we're about to tell other threads to wait for. - // - GCHeapUtilities::GetGCHeap()->ResetWaitForGCEvent(); - - // - // Remember that we're the one suspending the EE - // - g_pSuspensionThread = pCurThread; - - // - // Tell all threads, globally, to wait for WaitForGCEvent. - // - ThreadStore::TrapReturningThreads(TRUE); - // // Remember why we're doing this. // @@ -5622,13 +5559,85 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) _ASSERTE(ThreadStore::HoldingThreadStore() || g_fProcessDetach); +#ifdef PROFILING_SUPPORTED + // If the profiler desires information about GCs, then let it know that one + // is starting. + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER); + _ASSERTE(reason != ThreadSuspend::SUSPEND_FOR_DEBUGGER_SWEEP); + + { + (&g_profControlBlock)->RuntimeSuspendStarted( + GCSuspendReasonToProfSuspendReason(reason)); + } + if (pCurThread) + { + // Notify the profiler that the thread that is actually doing the GC is 'suspended', + // meaning that it is doing stuff other than run the managed code it was before the + // GC started. + (&g_profControlBlock)->RuntimeThreadSuspended((ThreadID)pCurThread); + } + END_PROFILER_CALLBACK(); + } +#endif // PROFILING_SUPPORTED + + // + // If this thread is running at low priority, boost its priority. We remember the old + // priority so that we can restore it in ResumeEE. + // + if (pCurThread) // concurrent GC occurs on threads we don't know about + { + _ASSERTE(pCurThread->m_Priority == INVALID_THREAD_PRIORITY); + int priority = pCurThread->GetThreadPriority(); + if (priority < THREAD_PRIORITY_NORMAL) + { + pCurThread->m_Priority = priority; + pCurThread->SetThreadPriority(THREAD_PRIORITY_NORMAL); + } + } + // // Now that we've instructed all threads to please stop, // go interrupt the ones that are running managed code and force them to stop. // This does not return until all threads have acknowledged that they // will not run managed code. // - SuspendRuntime(reason); + SuspendAllThreads(); + +#ifdef PROFILING_SUPPORTED + // If a profiler is keeping track of GC events, notify it + { + BEGIN_PROFILER_CALLBACK(CORProfilerTrackSuspends()); + (&g_profControlBlock)->RuntimeSuspendFinished(); + END_PROFILER_CALLBACK(); + } +#endif // PROFILING_SUPPORTED + +#ifdef _DEBUG + if (reason == ThreadSuspend::SUSPEND_FOR_GC) + { + Thread* thread = NULL; + while ((thread = ThreadStore::GetThreadList(thread)) != NULL) + { + thread->DisableStressHeap(); + _ASSERTE(!thread->HasThreadState(Thread::TS_GCSuspendPending)); + } + } +#endif + +#ifdef HAVE_GCCOVER + // + // Now that the EE has been suspended, let's see if any oustanding + // gcstress instruction updates need to occur. Each thread can + // have only one pending at a time. + // + Thread* thread = NULL; + while ((thread = ThreadStore::GetThreadList(thread)) != NULL) + { + thread->CommitGCStressInstructionUpdate(); + } +#endif // HAVE_GCCOVER #ifdef DEBUGGING_SUPPORTED // If the debugging services are attached, then its possible @@ -5689,16 +5698,6 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) g_SuspendStatistics.EndSuspend(reason == SUSPEND_FOR_GC || reason == SUSPEND_FOR_GC_PREP); #endif //TIME_SUSPEND ThreadSuspend::s_fSuspended = true; - -#if defined(TARGET_ARM) || defined(TARGET_ARM64) - // Flush the store buffers on all CPUs, to ensure that all changes made so far are seen - // by the GC threads. This only matters on weak memory ordered processors as - // the strong memory ordered processors wouldn't have reordered the relevant writes. - // This is needed to synchronize threads that were running in preemptive mode thus were - // left alone by suspension to flush their writes that they made before they switched to - // preemptive mode. - ::FlushProcessWriteBuffers(); -#endif //TARGET_ARM || TARGET_ARM64 } #ifdef FEATURE_THREAD_ACTIVATION diff --git a/src/coreclr/vm/threadsuspend.h b/src/coreclr/vm/threadsuspend.h index 3dcf3cff6cd4d7..b72e4c33534784 100644 --- a/src/coreclr/vm/threadsuspend.h +++ b/src/coreclr/vm/threadsuspend.h @@ -182,8 +182,8 @@ class ThreadSuspend private: static SUSPEND_REASON m_suspendReason; // This contains the reason why the runtime is suspended - static void SuspendRuntime(ThreadSuspend::SUSPEND_REASON reason); - static void ResumeRuntime(BOOL bFinishedGC, BOOL SuspendSucceeded); + static void SuspendAllThreads(); + static void ResumeAllThreads(BOOL SuspendSucceeded); public: // Initialize thread suspension support static void Initialize(); From c9ad7fff640567ff541991bf6ff3f52defa3539f Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:52:31 -0700 Subject: [PATCH 23/26] SetThreadTrapForSuspension --- src/coreclr/vm/comtoclrcall.cpp | 2 +- src/coreclr/vm/fcall.h | 4 +- src/coreclr/vm/jithelpers.cpp | 8 +-- src/coreclr/vm/threads.cpp | 8 +-- src/coreclr/vm/threads.h | 25 +++++---- src/coreclr/vm/threadsuspend.cpp | 95 +++++++++++++++++++++++--------- src/coreclr/vm/vars.cpp | 2 +- src/coreclr/vm/vars.hpp | 13 ++++- 8 files changed, 107 insertions(+), 50 deletions(-) diff --git a/src/coreclr/vm/comtoclrcall.cpp b/src/coreclr/vm/comtoclrcall.cpp index 9a65a3ebc8501c..925372cbe3ad0e 100644 --- a/src/coreclr/vm/comtoclrcall.cpp +++ b/src/coreclr/vm/comtoclrcall.cpp @@ -515,7 +515,7 @@ extern "C" UINT64 __stdcall COMToCLRWorker(Thread *pThread, ComMethodFrame* pFra // we have additional checks for thread abort that are performed only when // g_TrapReturningThreads is set. pThread->m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - if (g_TrapReturningThreads.LoadWithoutBarrier()) + if (g_TrapReturningThreads) { hr = StubRareDisableHRWorker(pThread); if (S_OK != hr) diff --git a/src/coreclr/vm/fcall.h b/src/coreclr/vm/fcall.h index bbb5ba9ca5320d..e40f525abb83bb 100644 --- a/src/coreclr/vm/fcall.h +++ b/src/coreclr/vm/fcall.h @@ -809,7 +809,7 @@ Object* FC_GCPoll(void* me, Object* objToProtect = NULL); { \ INCONTRACT(Thread::TriggersGC(GetThread());) \ INCONTRACT(__fCallCheck.SetDidPoll();) \ - if (g_TrapReturningThreads.LoadWithoutBarrier()) \ + if (g_TrapReturningThreads) \ { \ if (FC_GCPoll(__me)) \ return ret; \ @@ -824,7 +824,7 @@ Object* FC_GCPoll(void* me, Object* objToProtect = NULL); { \ INCONTRACT(__fCallCheck.SetDidPoll();) \ Object* __temp = OBJECTREFToObject(obj); \ - if (g_TrapReturningThreads.LoadWithoutBarrier()) \ + if (g_TrapReturningThreads) \ { \ __temp = FC_GCPoll(__me, __temp); \ while (0 == FC_NO_TAILCALL) { }; /* side effect the compile can't remove */ \ diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 05d2ad18b5bd94..103da4c36070e7 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -2935,7 +2935,7 @@ HCIMPL2(LPVOID, Unbox_Helper, CORINFO_CLASS_HANDLE type, Object* obj) if (pMT1->GetInternalCorElementType() == pMT2->GetInternalCorElementType() && (pMT1->IsEnum() || pMT1->IsTruePrimitive()) && (pMT2->IsEnum() || pMT2->IsTruePrimitive()) && - g_TrapReturningThreads.LoadWithoutBarrier() == 0) + g_TrapReturningThreads == 0) { return obj->GetData(); } @@ -4811,7 +4811,7 @@ HCIMPL0(VOID, JIT_PollGC) FCALL_CONTRACT; // As long as we can have GCPOLL_CALL polls, it would not hurt to check the trap flag. - if (!g_TrapReturningThreads.LoadWithoutBarrier()) + if (!g_TrapReturningThreads) return; // Does someone want this thread stopped? @@ -6156,7 +6156,7 @@ HCIMPL3_RAW(void, JIT_ReversePInvokeEnterTrackTransitions, ReversePInvokeFrame* // Manually inline the fast path in Thread::DisablePreemptiveGC(). thread->m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - if (g_TrapReturningThreads.LoadWithoutBarrier() != 0) + if (g_TrapReturningThreads != 0) { // If we're in an IL stub, we want to trace the address of the target method, // not the next instruction in the stub. @@ -6193,7 +6193,7 @@ HCIMPL1_RAW(void, JIT_ReversePInvokeEnter, ReversePInvokeFrame* frame) // Manually inline the fast path in Thread::DisablePreemptiveGC(). thread->m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - if (g_TrapReturningThreads.LoadWithoutBarrier() != 0) + if (g_TrapReturningThreads != 0) { JIT_ReversePInvokeEnterRare2(frame, _ReturnAddress()); } diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 34b90e0647e91a..8a4289f04ff04d 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -5762,7 +5762,7 @@ BOOL ThreadStore::DbgFindThread(Thread *target) // Cache the current change stamp for g_TrapReturningThreads LONG chgStamp = g_trtChgStamp; - STRESS_LOG3(LF_STORE, LL_INFO100, "ThreadStore::DbgFindThread - [thread=%p]. trt=%d. chgStamp=%d\n", GetThreadNULLOk(), g_TrapReturningThreads.Load(), chgStamp); + STRESS_LOG3(LF_STORE, LL_INFO100, "ThreadStore::DbgFindThread - [thread=%p]. trt=%d. chgStamp=%d\n", GetThreadNULLOk(), g_TrapReturningThreads, chgStamp); #if 0 // g_TrapReturningThreads debug code. int iRetry = 0; @@ -5835,7 +5835,7 @@ BOOL ThreadStore::DbgFindThread(Thread *target) } #endif // g_TrapReturningThreads debug code. - STRESS_LOG4(LF_STORE, LL_INFO100, "ThreadStore::DbgFindThread - [thread=%p]. trt=%d. chg=%d. cnt=%d\n", GetThreadNULLOk(), g_TrapReturningThreads.Load(), g_trtChgStamp.Load(), cntReturn); + STRESS_LOG4(LF_STORE, LL_INFO100, "ThreadStore::DbgFindThread - [thread=%p]. trt=%d. chg=%d. cnt=%d\n", GetThreadNULLOk(), g_TrapReturningThreads, g_trtChgStamp.Load(), cntReturn); // Because of race conditions and the fact that the GC places its // own count, I can't assert this precisely. But I do want to be @@ -5850,11 +5850,11 @@ BOOL ThreadStore::DbgFindThread(Thread *target) // return). // // Note: we don't actually assert this if - // ThreadStore::TrapReturningThreads() updated g_TrapReturningThreads + // ThreadStore::TrapReturningThreadsIncrement() updated g_TrapReturningThreads // between the beginning of this function and the moment of the assert. // *** The order of evaluation in the if condition is important *** _ASSERTE( - (g_trtChgInFlight != 0 || (cntReturn + 2 >= g_TrapReturningThreads) || chgStamp != g_trtChgStamp) || + (g_trtChgInFlight != 0 || (cntReturn + 2 >= g_TrapReturningThreads / 2) || chgStamp != g_trtChgStamp) || g_fEEShutDown); return found; diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 37892260ce20ac..9f8f08fdaca6c6 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -1366,7 +1366,7 @@ class Thread m_fPreemptiveGCDisabled.StoreWithoutBarrier(1); - if (g_TrapReturningThreads.LoadWithoutBarrier()) + if (g_TrapReturningThreads) { RareDisablePreemptiveGC(); } @@ -4148,14 +4148,19 @@ class ThreadStore == m_BackgroundThreadCount); } - // If you want to trap threads re-entering the EE (be this for GC, or debugging, - // or Thread.Suspend() or whatever, you need to TrapReturningThreads(TRUE). When - // you are finished snagging threads, call TrapReturningThreads(FALSE). This + // If you want to trap threads re-entering the EE (for debugging, + // or Thread.Suspend() or whatever, you need to TrapReturningThreadsIncrement(). When + // you are finished snagging threads, call TrapReturningThreadsDecrement(). This // counts internally. // // Of course, you must also fix RareDisablePreemptiveGC to do the right thing // when the trap occurs. - static void TrapReturningThreads(BOOL yes); + static void TrapReturningThreadsIncrement(); + static void TrapReturningThreadsDecrement(); + + static void SetThreadTrapForSuspension(); + static void UnsetThreadTrapForSuspension(); + static bool IsTrappingThreadsForSuspension(); private: @@ -4319,8 +4324,8 @@ class ThreadStore }; struct TSSuspendHelper { - static void SetTrap() { ThreadStore::TrapReturningThreads(TRUE); } - static void UnsetTrap() { ThreadStore::TrapReturningThreads(FALSE); } + static void SetTrap() { ThreadStore::TrapReturningThreadsIncrement(); } + static void UnsetTrap() { ThreadStore::TrapReturningThreadsDecrement(); } }; typedef StateHolder TSSuspendHolder; @@ -4511,7 +4516,7 @@ inline void Thread::MarkForDebugSuspend(void) if (!HasThreadState(TS_DebugSuspendPending)) { SetThreadState(TS_DebugSuspendPending); - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::TrapReturningThreadsIncrement(); } } @@ -4522,13 +4527,13 @@ inline void Thread::IncrementTraceCallCount() { WRAPPER_NO_CONTRACT; InterlockedIncrement(&m_TraceCallCount); - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::TrapReturningThreadsIncrement(); } inline void Thread::DecrementTraceCallCount() { WRAPPER_NO_CONTRACT; - ThreadStore::TrapReturningThreads(FALSE); + ThreadStore::TrapReturningThreadsDecrement(); InterlockedDecrement(&m_TraceCallCount); } diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 305e3314b16326..136016cd607f30 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1295,7 +1295,7 @@ Thread::UserAbort(EEPolicy::ThreadAbortTypes abortType, DWORD timeout) // The thread being aborted may clear the TS_AbortRequested bit and the matching increment // of g_TrapReturningThreads behind our back. Increment g_TrapReturningThreads here // to ensure that we stop for the stack crawl even if the TS_AbortRequested bit is cleared. - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::TrapReturningThreadsIncrement(); } void NeedStackCrawl() { @@ -1310,7 +1310,7 @@ Thread::UserAbort(EEPolicy::ThreadAbortTypes abortType, DWORD timeout) if (m_NeedRelease) { m_NeedRelease = FALSE; - ThreadStore::TrapReturningThreads(FALSE); + ThreadStore::TrapReturningThreadsDecrement(); ThreadStore::SetStackCrawlEvent(); m_pThread->ResetThreadState(TS_StackCrawlNeeded); if (!m_fHoldingThreadStoreLock) @@ -1755,7 +1755,7 @@ void Thread::SetAbortRequestBit() } if (InterlockedCompareExchange((LONG*)&m_State, curValue|TS_AbortRequested, curValue) == curValue) { - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::TrapReturningThreadsIncrement(); break; } @@ -1771,7 +1771,7 @@ void Thread::RemoveAbortRequestBit() #ifdef _DEBUG // There's a race between removing the TS_AbortRequested bit and decrementing g_TrapReturningThreads - // We may remove the bit, but before we have a chance to call ThreadStore::TrapReturningThreads(FALSE) + // We may remove the bit, but before we have a chance to call ThreadStore::TrapReturningThreadsDecrement() // DbgFindThread() may execute, and find too few threads with the bit set. // To ensure the assert in DbgFindThread does not fire under such a race we set the ChgInFlight before hand. CounterHolder trtHolder(&g_trtChgInFlight); @@ -1785,7 +1785,7 @@ void Thread::RemoveAbortRequestBit() } if (InterlockedCompareExchange((LONG*)&m_State, curValue&(~TS_AbortRequested), curValue) == curValue) { - ThreadStore::TrapReturningThreads(FALSE); + ThreadStore::TrapReturningThreadsDecrement(); break; } @@ -2171,8 +2171,7 @@ void Thread::RareDisablePreemptiveGC() } #endif // DEBUGGING_SUPPORTED - // TODO: VS should check if suspension is requested. - if (GCHeapUtilities::IsGCInProgress()) + if (ThreadStore::IsTrappingThreadsForSuspension()) { EnablePreemptiveGC(); @@ -2395,7 +2394,7 @@ void Thread::PulseGCMode() // Indicate whether threads should be trapped when returning to the EE (i.e. disabling // preemptive GC mode) Volatile g_fTrapReturningThreadsLock; -void ThreadStore::TrapReturningThreads(BOOL yes) +void ThreadStore::TrapReturningThreadsIncrement() { CONTRACTL { NOTHROW; @@ -2418,31 +2417,73 @@ void ThreadStore::TrapReturningThreads(BOOL yes) suspend.Acquire(); } - if (yes) - { #ifdef _DEBUG - CounterHolder trtHolder(&g_trtChgInFlight); - InterlockedIncrement(&g_trtChgStamp); + CounterHolder trtHolder(&g_trtChgInFlight); + InterlockedIncrement(&g_trtChgStamp); #endif - GCHeapUtilities::GetGCHeap()->SetSuspensionPending(true); - InterlockedIncrement ((LONG *)&g_TrapReturningThreads); - _ASSERTE(g_TrapReturningThreads > 0); + InterlockedAdd ((LONG *)&g_TrapReturningThreads, 2); + _ASSERTE(g_TrapReturningThreads > 0); #ifdef _DEBUG - trtHolder.Release(); + trtHolder.Release(); #endif - } - else + + g_fTrapReturningThreadsLock = 0; +} + +void ThreadStore::TrapReturningThreadsDecrement() +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + } CONTRACTL_END; + + // make sure that a thread doesn't get suspended holding g_fTrapReturningThreadsLock + // if a suspended thread held this lock and then the suspending thread called in + // here (which it does) the suspending thread would deadlock causing the suspension + // as a whole to deadlock + ForbidSuspendThreadHolder suspend; + + DWORD dwSwitchCount = 0; + while (1 == InterlockedExchange(&g_fTrapReturningThreadsLock, 1)) { - InterlockedDecrement ((LONG *)&g_TrapReturningThreads); - GCHeapUtilities::GetGCHeap()->SetSuspensionPending(false); - _ASSERTE(g_TrapReturningThreads >= 0); + // we can't forbid suspension while we are sleeping and don't hold the lock + // this will trigger an assert on SQLCLR but is a general issue + suspend.Release(); + __SwitchToThread(0, ++dwSwitchCount); + suspend.Acquire(); } + InterlockedAdd ((LONG *)&g_TrapReturningThreads, -2); + _ASSERTE(g_TrapReturningThreads >= 0); + g_fTrapReturningThreadsLock = 0; } +void ThreadStore::SetThreadTrapForSuspension() +{ + _ASSERTE(ThreadStore::HoldingThreadStore()); + + _ASSERTE(!IsTrappingThreadsForSuspension()); + GCHeapUtilities::GetGCHeap()->SetSuspensionPending(true); + InterlockedIncrement((LONG *)&g_TrapReturningThreads); +} + +void ThreadStore::UnsetThreadTrapForSuspension() +{ + _ASSERTE(ThreadStore::HoldingThreadStore()); + + _ASSERTE(IsTrappingThreadsForSuspension()); + InterlockedDecrement((LONG *)&g_TrapReturningThreads); + GCHeapUtilities::GetGCHeap()->SetSuspensionPending(false); +} + +bool ThreadStore::IsTrappingThreadsForSuspension() +{ + return (g_TrapReturningThreads & 1) != 0; +} + #ifdef FEATURE_HIJACK void RedirectedThreadFrame::ExceptionUnwind() @@ -3290,7 +3331,7 @@ void ThreadSuspend::SuspendAllThreads() // // Tell all threads, globally, to wait for WaitForGCEvent. // - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::SetThreadTrapForSuspension(); _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); @@ -3612,7 +3653,7 @@ void ThreadSuspend::ResumeAllThreads(BOOL SuspendSucceeded) // you may have to change routine GCHeapUtilities::SafeToRestartManagedThreads // as well. // - ThreadStore::TrapReturningThreads(FALSE); + ThreadStore::UnsetThreadTrapForSuspension(); g_pSuspensionThread = 0; // @@ -3950,7 +3991,7 @@ bool Thread::SysStartSuspendForDebug(AppDomain *pAppDomain) } LOG((LF_CORDB, LL_INFO1000, "[0x%x] SUSPEND: starting suspend. Trap count: %d\n", - pCurThread ? pCurThread->GetThreadId() : (DWORD) -1, g_TrapReturningThreads.Load())); + pCurThread ? pCurThread->GetThreadId() : (DWORD) -1, g_TrapReturningThreads)); // Caller is expected to be holding the ThreadStore lock _ASSERTE(ThreadStore::HoldingThreadStore() || IsAtProcessExit()); @@ -4409,7 +4450,7 @@ void Thread::SysResumeFromDebug(AppDomain *pAppDomain) } } - LOG((LF_CORDB, LL_INFO1000, "RESUME: resume complete. Trap count: %d\n", g_TrapReturningThreads.Load())); + LOG((LF_CORDB, LL_INFO1000, "RESUME: resume complete. Trap count: %d\n", g_TrapReturningThreads)); } /* @@ -5335,7 +5376,7 @@ void Thread::MarkForSuspension(ULONG bit) _ASSERTE((m_State & bit) == 0); InterlockedOr((LONG*)&m_State, bit); - ThreadStore::TrapReturningThreads(TRUE); + ThreadStore::TrapReturningThreadsIncrement(); } void Thread::UnmarkForSuspension(ULONG mask) @@ -5354,7 +5395,7 @@ void Thread::UnmarkForSuspension(ULONG mask) _ASSERTE((m_State & ~mask) != 0); // we decrement the global first to be able to satisfy the assert from DbgFindThread - ThreadStore::TrapReturningThreads(FALSE); + ThreadStore::TrapReturningThreadsDecrement(); InterlockedAnd((LONG*)&m_State, mask); } diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp index f6a02953906330..a5a92e1f115d24 100644 --- a/src/coreclr/vm/vars.cpp +++ b/src/coreclr/vm/vars.cpp @@ -21,7 +21,7 @@ const char g_psBaseLibrary[] = CoreLibName_IL_A; const char g_psBaseLibraryName[] = CoreLibName_A; const char g_psBaseLibrarySatelliteAssemblyName[] = CoreLibSatelliteName_A; -Volatile g_TrapReturningThreads; +volatile int32_t g_TrapReturningThreads; #ifdef _DEBUG // next two variables are used to enforce an ASSERT in Thread::DbgFindThread diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp index 5b5c133ad8b42a..3c5d82f73848c5 100644 --- a/src/coreclr/vm/vars.hpp +++ b/src/coreclr/vm/vars.hpp @@ -295,7 +295,18 @@ class Module; // For [ g_TrapReturningThreads; +// g_TrapReturningThreads == 0 disables thread polling/traping. +// This allows to short-circuit further examining of thread states in the most +// common scenario - when we are not interested in trapping anything. +// +// The bit #1 is reserved for controlling thread suspension. +// Setting bit #1 allows to atomically indicate/check that EE suspension is in progress. +// There could be only one EE suspension in progress at a time. (it requires holding ThreadStore lock) +// . +// Other bits are used as a counter to enable thread trapping for other reasons, such as ThreadAbort. +// There could be several active reasons for thread trapping at a time, like aborting multiple threads, +// thus g_TrapReturningThreads value could be > 3. +extern "C" volatile int32_t g_TrapReturningThreads; #ifdef _DEBUG // next two variables are used to enforce an ASSERT in Thread::DbgFindThread From 96ab9f5ecabb2c7e8b332fee3f71141702e32638 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Tue, 30 Apr 2024 23:41:27 -0700 Subject: [PATCH 24/26] deleted TS_GCSuspendPending --- docs/design/datacontracts/Thread.md | 2 -- src/coreclr/vm/fcall.cpp | 2 +- src/coreclr/vm/frames.h | 2 +- src/coreclr/vm/interpreter.cpp | 2 +- src/coreclr/vm/jithelpers.cpp | 10 +++++----- src/coreclr/vm/object.inl | 2 +- src/coreclr/vm/syncblk.cpp | 2 +- src/coreclr/vm/threads.cpp | 14 ++++++-------- src/coreclr/vm/threads.h | 23 +++++++++++++---------- src/coreclr/vm/threadsuspend.cpp | 20 +------------------- 10 files changed, 30 insertions(+), 49 deletions(-) diff --git a/docs/design/datacontracts/Thread.md b/docs/design/datacontracts/Thread.md index 7bee0fe79fdc77..2910dc8dbb8200 100644 --- a/docs/design/datacontracts/Thread.md +++ b/docs/design/datacontracts/Thread.md @@ -22,9 +22,7 @@ enum ThreadState TS_AbortRequested = 0x00000001, // Abort the thread - TS_GCSuspendPending = 0x00000002, // ThreadSuspend::SuspendRuntime watches this thread to leave coop mode. TS_GCSuspendRedirected = 0x00000004, // ThreadSuspend::SuspendRuntime has redirected the thread to suspention routine. - TS_GCSuspendFlags = TS_GCSuspendPending | TS_GCSuspendRedirected, // used to track suspension progress. Only SuspendRuntime writes/resets these. TS_DebugSuspendPending = 0x00000008, // Is the debugger suspending threads? TS_GCOnTransitions = 0x00000010, // Force a GC on stub transitions (GCStress only) diff --git a/src/coreclr/vm/fcall.cpp b/src/coreclr/vm/fcall.cpp index 20461bef798808..f344018807e7ab 100644 --- a/src/coreclr/vm/fcall.cpp +++ b/src/coreclr/vm/fcall.cpp @@ -129,7 +129,7 @@ NOINLINE Object* FC_GCPoll(void* __me, Object* objToProtect) INCONTRACT(FCallCheck __fCallCheck(__FILE__, __LINE__)); Thread *thread = GetThread(); - if (thread->CatchAtSafePointOpportunistic()) // Does someone want this thread stopped? + if (thread->CatchAtSafePoint()) // Does someone want this thread stopped? { HELPER_METHOD_FRAME_BEGIN_RET_ATTRIB_1(Frame::FRAME_ATTR_CAPTURE_DEPTH_2, objToProtect); diff --git a/src/coreclr/vm/frames.h b/src/coreclr/vm/frames.h index 512ce37520a709..3d3fe177bcbb82 100644 --- a/src/coreclr/vm/frames.h +++ b/src/coreclr/vm/frames.h @@ -1332,7 +1332,7 @@ class HelperMethodFrame : public Frame FORCEINLINE void Poll() { WRAPPER_NO_CONTRACT; - if (m_pThread->CatchAtSafePointOpportunistic()) + if (m_pThread->CatchAtSafePoint()) CommonTripThread(); } #endif // DACCESS_COMPILE diff --git a/src/coreclr/vm/interpreter.cpp b/src/coreclr/vm/interpreter.cpp index 2edb5948d72e80..20e42c83eb9b75 100644 --- a/src/coreclr/vm/interpreter.cpp +++ b/src/coreclr/vm/interpreter.cpp @@ -1952,7 +1952,7 @@ static void MonitorEnter(Object* obj, BYTE* pbLockTaken) GCPROTECT_BEGININTERIOR(pbLockTaken); - if (GET_THREAD()->CatchAtSafePointOpportunistic()) + if (GET_THREAD()->CatchAtSafePoint()) { GET_THREAD()->PulseGCMode(); } diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 103da4c36070e7..a8c0defb26447f 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -3651,7 +3651,7 @@ NOINLINE static void JIT_MonEnter_Helper(Object* obj, BYTE* pbLockTaken, LPVOID GCPROTECT_BEGININTERIOR(pbLockTaken); - if (GET_THREAD()->CatchAtSafePointOpportunistic()) + if (GET_THREAD()->CatchAtSafePoint()) { GET_THREAD()->PulseGCMode(); } @@ -3730,7 +3730,7 @@ NOINLINE static void JIT_MonTryEnter_Helper(Object* obj, INT32 timeOut, BYTE* pb GCPROTECT_BEGININTERIOR(pbLockTaken); - if (GET_THREAD()->CatchAtSafePointOpportunistic()) + if (GET_THREAD()->CatchAtSafePoint()) { GET_THREAD()->PulseGCMode(); } @@ -3764,7 +3764,7 @@ HCIMPL3(void, JIT_MonTryEnter_Portable, Object* obj, INT32 timeOut, BYTE* pbLock pCurThread = GetThread(); - if (pCurThread->CatchAtSafePointOpportunistic()) + if (pCurThread->CatchAtSafePoint()) { goto FramedLockHelper; } @@ -3936,7 +3936,7 @@ HCIMPL_MONHELPER(JIT_MonEnterStatic_Portable, AwareLock *lock) MONHELPER_STATE(_ASSERTE(pbLockTaken != NULL && *pbLockTaken == 0)); Thread *pCurThread = GetThread(); - if (pCurThread->CatchAtSafePointOpportunistic()) + if (pCurThread->CatchAtSafePoint()) { goto FramedLockHelper; } @@ -4815,7 +4815,7 @@ HCIMPL0(VOID, JIT_PollGC) return; // Does someone want this thread stopped? - if (!GetThread()->CatchAtSafePointOpportunistic()) + if (!GetThread()->CatchAtSafePoint()) return; // Tailcall to the slow helper diff --git a/src/coreclr/vm/object.inl b/src/coreclr/vm/object.inl index 491aab1d4c873f..c78222529c309c 100644 --- a/src/coreclr/vm/object.inl +++ b/src/coreclr/vm/object.inl @@ -114,7 +114,7 @@ FORCEINLINE bool Object::TryEnterObjMonitorSpinHelper() } CONTRACTL_END; Thread *pCurThread = GetThread(); - if (pCurThread->CatchAtSafePointOpportunistic()) + if (pCurThread->CatchAtSafePoint()) { return false; } diff --git a/src/coreclr/vm/syncblk.cpp b/src/coreclr/vm/syncblk.cpp index 606137ee8eba75..482b9da36644fd 100644 --- a/src/coreclr/vm/syncblk.cpp +++ b/src/coreclr/vm/syncblk.cpp @@ -598,7 +598,7 @@ void SyncBlockCache::CleanupSyncBlocks() pParam->psb = NULL; // pulse GC mode to allow GC to perform its work - if (FinalizerThread::GetFinalizerThread()->CatchAtSafePointOpportunistic()) + if (FinalizerThread::GetFinalizerThread()->CatchAtSafePoint()) { FinalizerThread::GetFinalizerThread()->PulseGCMode(); } diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 8a4289f04ff04d..d71c4e20436559 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -7065,22 +7065,20 @@ void CommonTripThread() CONTRACTL { THROWS; GC_TRIGGERS; + MODE_COOPERATIVE; } CONTRACTL_END; Thread *thread = GetThread(); - thread->HandleThreadAbort (); + thread->HandleThreadAbort(); - if (thread->CatchAtSafePointOpportunistic()) - { - _ASSERTE(!ThreadStore::HoldingThreadStore(thread)); + _ASSERTE(!ThreadStore::HoldingThreadStore(thread)); #ifdef FEATURE_HIJACK - thread->UnhijackThread(); + thread->UnhijackThread(); #endif // FEATURE_HIJACK - // Trap - thread->PulseGCMode(); - } + // Trap + thread->PulseGCMode(); #else DacNotImpl(); #endif // #ifndef DACCESS_COMPILE diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 9f8f08fdaca6c6..9c6feb5a1de4ac 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -592,7 +592,7 @@ class Thread public: // If we are trying to suspend a thread, we set the appropriate pending bit to - // indicate why we want to suspend it (TS_GCSuspendPending or TS_DebugSuspendPending). + // indicate why we want to suspend it (TS_AbortRequested or TS_DebugSuspendPending). // // If instead the thread has blocked itself, via WaitSuspendEvent, we indicate // this with TS_SyncSuspended. However, we need to know whether the synchronous @@ -608,9 +608,8 @@ class Thread TS_AbortRequested = 0x00000001, // Abort the thread - TS_GCSuspendPending = 0x00000002, // ThreadSuspend::SuspendRuntime watches this thread to leave coop mode. + // unused = 0x00000002, TS_GCSuspendRedirected = 0x00000004, // ThreadSuspend::SuspendRuntime has redirected the thread to suspention routine. - TS_GCSuspendFlags = TS_GCSuspendPending | TS_GCSuspendRedirected, // used to track suspension progress. Only SuspendRuntime writes/resets these. TS_DebugSuspendPending = 0x00000008, // Is the debugger suspending threads? TS_GCOnTransitions = 0x00000010, // Force a GC on stub transitions (GCStress only) @@ -671,8 +670,7 @@ class Thread // enum is changed, we also need to update SOS to reflect this. // We require (and assert) that the following bits are less than 0x100. - TS_CatchAtSafePoint = (TS_AbortRequested | TS_GCSuspendPending | - TS_DebugSuspendPending | TS_GCOnTransitions), + TS_CatchAtSafePoint = (TS_AbortRequested | TS_DebugSuspendPending | TS_GCOnTransitions), }; // Thread flags that aren't really states in themselves but rather things the thread @@ -937,11 +935,18 @@ class Thread void DoExtraWorkForFinalizer(); #ifndef DACCESS_COMPILE - DWORD CatchAtSafePointOpportunistic() + DWORD CatchAtSafePoint() { - LIMITED_METHOD_CONTRACT; + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } + CONTRACTL_END; - return HasThreadStateOpportunistic(TS_CatchAtSafePoint); + return g_TrapReturningThreads & 1 || + HasThreadStateOpportunistic(TS_CatchAtSafePoint); } #endif // DACCESS_COMPILE @@ -3222,8 +3227,6 @@ class Thread if (SuspendSucceeded) UnhijackThread(); #endif // FEATURE_HIJACK - - _ASSERTE(!HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)); } static LPVOID GetStaticFieldAddress(FieldDesc *pFD); diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index 136016cd607f30..e513f8fb1903ec 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -3317,7 +3317,6 @@ void ThreadSuspend::SuspendAllThreads() // This thread Thread *pCurThread = GetThreadNULLOk(); - // // Remember that we're the one suspending the EE // @@ -3333,8 +3332,6 @@ void ThreadSuspend::SuspendAllThreads() // ThreadStore::SetThreadTrapForSuspension(); - _ASSERTE(!pCurThread || !pCurThread->HasThreadState(Thread::TS_GCSuspendFlags)); - // Flush the store buffers on all CPUs, to ensure two things: // - we get a reliable reading of the threads' m_fPreemptiveGCDisabled state // - other threads see that g_TrapReturningThreads is set @@ -3359,24 +3356,12 @@ void ThreadSuspend::SuspendAllThreads() if (pTargetThread->m_fPreemptiveGCDisabled.LoadWithoutBarrier()) { - if (!pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) - { - pTargetThread->SetThreadState(Thread::TS_GCSuspendPending); - } - remaining++; if (!observeOnly) { pTargetThread->Hijack(); } } - else - { - if (pTargetThread->HasThreadStateOpportunistic(Thread::TS_GCSuspendPending)) - { - pTargetThread->ResetThreadState(Thread::TS_GCSuspendPending); - } - } } if (!remaining) @@ -3503,12 +3488,10 @@ void Thread::Hijack() } // the thread is suspended here, we can hijack, if it is stopped in hijackable location - if (!m_fPreemptiveGCDisabled.LoadWithoutBarrier()) { // actually, we are done with this one STRESS_LOG1(LF_SYNC, LL_INFO1000, " Thread %x went preemptive while suspending it is at a GC safe point\n", this); - ResetThreadState(Thread::TS_GCSuspendFlags); ResumeThread(); return; } @@ -5662,7 +5645,6 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) while ((thread = ThreadStore::GetThreadList(thread)) != NULL) { thread->DisableStressHeap(); - _ASSERTE(!thread->HasThreadState(Thread::TS_GCSuspendPending)); } } #endif @@ -5717,7 +5699,7 @@ void ThreadSuspend::SuspendEE(SUSPEND_REASON reason) LOG((LF_GCROOTS | LF_GC | LF_CORDB, LL_INFO10, "The EE is free now...\n")); // If someone's trying to suspend *this* thread, this is a good opportunity. - if (pCurThread && pCurThread->CatchAtSafePointOpportunistic()) + if (pCurThread && pCurThread->CatchAtSafePoint()) { pCurThread->PulseGCMode(); // Go suspend myself. } From 48e1c78da2a1676c59d77335f14bad50f0cbf1bc Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Wed, 1 May 2024 12:01:36 -0700 Subject: [PATCH 25/26] tweaks --- src/coreclr/vm/threads.cpp | 2 +- src/coreclr/vm/threads.h | 18 +++++++++--------- src/coreclr/vm/threadsuspend.cpp | 30 ++++++++++++++---------------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index d71c4e20436559..68b42e8d2e704c 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -5850,7 +5850,7 @@ BOOL ThreadStore::DbgFindThread(Thread *target) // return). // // Note: we don't actually assert this if - // ThreadStore::TrapReturningThreadsIncrement() updated g_TrapReturningThreads + // ThreadStore::IncrementTrapReturningThreads() updated g_TrapReturningThreads // between the beginning of this function and the moment of the assert. // *** The order of evaluation in the if condition is important *** _ASSERTE( diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 9c6feb5a1de4ac..e60cb4fd8168ed 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -4152,14 +4152,14 @@ class ThreadStore } // If you want to trap threads re-entering the EE (for debugging, - // or Thread.Suspend() or whatever, you need to TrapReturningThreadsIncrement(). When - // you are finished snagging threads, call TrapReturningThreadsDecrement(). This + // or Thread.Suspend() or whatever, you need to IncrementTrapReturningThreads(). When + // you are finished snagging threads, call DecrementTrapReturningThreads(). This // counts internally. // // Of course, you must also fix RareDisablePreemptiveGC to do the right thing // when the trap occurs. - static void TrapReturningThreadsIncrement(); - static void TrapReturningThreadsDecrement(); + static void IncrementTrapReturningThreads(); + static void DecrementTrapReturningThreads(); static void SetThreadTrapForSuspension(); static void UnsetThreadTrapForSuspension(); @@ -4327,8 +4327,8 @@ class ThreadStore }; struct TSSuspendHelper { - static void SetTrap() { ThreadStore::TrapReturningThreadsIncrement(); } - static void UnsetTrap() { ThreadStore::TrapReturningThreadsDecrement(); } + static void SetTrap() { ThreadStore::IncrementTrapReturningThreads(); } + static void UnsetTrap() { ThreadStore::DecrementTrapReturningThreads(); } }; typedef StateHolder TSSuspendHolder; @@ -4519,7 +4519,7 @@ inline void Thread::MarkForDebugSuspend(void) if (!HasThreadState(TS_DebugSuspendPending)) { SetThreadState(TS_DebugSuspendPending); - ThreadStore::TrapReturningThreadsIncrement(); + ThreadStore::IncrementTrapReturningThreads(); } } @@ -4530,13 +4530,13 @@ inline void Thread::IncrementTraceCallCount() { WRAPPER_NO_CONTRACT; InterlockedIncrement(&m_TraceCallCount); - ThreadStore::TrapReturningThreadsIncrement(); + ThreadStore::IncrementTrapReturningThreads(); } inline void Thread::DecrementTraceCallCount() { WRAPPER_NO_CONTRACT; - ThreadStore::TrapReturningThreadsDecrement(); + ThreadStore::DecrementTrapReturningThreads(); InterlockedDecrement(&m_TraceCallCount); } diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index e513f8fb1903ec..d8f3363611c988 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -1295,7 +1295,7 @@ Thread::UserAbort(EEPolicy::ThreadAbortTypes abortType, DWORD timeout) // The thread being aborted may clear the TS_AbortRequested bit and the matching increment // of g_TrapReturningThreads behind our back. Increment g_TrapReturningThreads here // to ensure that we stop for the stack crawl even if the TS_AbortRequested bit is cleared. - ThreadStore::TrapReturningThreadsIncrement(); + ThreadStore::IncrementTrapReturningThreads(); } void NeedStackCrawl() { @@ -1310,7 +1310,7 @@ Thread::UserAbort(EEPolicy::ThreadAbortTypes abortType, DWORD timeout) if (m_NeedRelease) { m_NeedRelease = FALSE; - ThreadStore::TrapReturningThreadsDecrement(); + ThreadStore::DecrementTrapReturningThreads(); ThreadStore::SetStackCrawlEvent(); m_pThread->ResetThreadState(TS_StackCrawlNeeded); if (!m_fHoldingThreadStoreLock) @@ -1755,7 +1755,7 @@ void Thread::SetAbortRequestBit() } if (InterlockedCompareExchange((LONG*)&m_State, curValue|TS_AbortRequested, curValue) == curValue) { - ThreadStore::TrapReturningThreadsIncrement(); + ThreadStore::IncrementTrapReturningThreads(); break; } @@ -1771,7 +1771,7 @@ void Thread::RemoveAbortRequestBit() #ifdef _DEBUG // There's a race between removing the TS_AbortRequested bit and decrementing g_TrapReturningThreads - // We may remove the bit, but before we have a chance to call ThreadStore::TrapReturningThreadsDecrement() + // We may remove the bit, but before we have a chance to call ThreadStore::DecrementTrapReturningThreads() // DbgFindThread() may execute, and find too few threads with the bit set. // To ensure the assert in DbgFindThread does not fire under such a race we set the ChgInFlight before hand. CounterHolder trtHolder(&g_trtChgInFlight); @@ -1785,7 +1785,7 @@ void Thread::RemoveAbortRequestBit() } if (InterlockedCompareExchange((LONG*)&m_State, curValue&(~TS_AbortRequested), curValue) == curValue) { - ThreadStore::TrapReturningThreadsDecrement(); + ThreadStore::DecrementTrapReturningThreads(); break; } @@ -2113,7 +2113,7 @@ void Thread::RareDisablePreemptiveGC() if (ThreadStore::HoldingThreadStore(this)) { // In theory threads should not try entering coop mode while holding TS lock, - // but some scenarios like GCCoopHackNoThread end up here + // but some scenarios like GCCoopHackNoThread and GCX_COOP_NO_THREAD_BROKEN end up here goto Exit; } @@ -2143,7 +2143,7 @@ void Thread::RareDisablePreemptiveGC() { #ifdef DEBUGGING_SUPPORTED // If debugger wants the thread to suspend, give the debugger precedence. - if ((m_State & TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) + if (HasThreadStateOpportunistic(TS_DebugSuspendPending) && !IsInForbidSuspendForDebuggerRegion()) { EnablePreemptiveGC(); @@ -2214,7 +2214,7 @@ void Thread::RareDisablePreemptiveGC() continue; } - if (HasThreadState(TS_StackCrawlNeeded)) + if (HasThreadStateOpportunistic(TS_StackCrawlNeeded)) { EnablePreemptiveGC(); ThreadStore::WaitForStackCrawlEvent(); @@ -2394,7 +2394,7 @@ void Thread::PulseGCMode() // Indicate whether threads should be trapped when returning to the EE (i.e. disabling // preemptive GC mode) Volatile g_fTrapReturningThreadsLock; -void ThreadStore::TrapReturningThreadsIncrement() +void ThreadStore::IncrementTrapReturningThreads() { CONTRACTL { NOTHROW; @@ -2432,7 +2432,7 @@ void ThreadStore::TrapReturningThreadsIncrement() g_fTrapReturningThreadsLock = 0; } -void ThreadStore::TrapReturningThreadsDecrement() +void ThreadStore::DecrementTrapReturningThreads() { CONTRACTL { NOTHROW; @@ -3210,14 +3210,14 @@ COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND } #endif // PROFILING_SUPPORTED -int64_t QueryPerformanceCounter() +static int64_t QueryPerformanceCounter() { LARGE_INTEGER ts; QueryPerformanceCounter(&ts); return ts.QuadPart; } -int64_t QueryPerformanceFrequency() +static int64_t QueryPerformanceFrequency() { LARGE_INTEGER ts; QueryPerformanceFrequency(&ts); @@ -3596,8 +3596,6 @@ void EnableStressHeapHelper() } #endif -// We're done with our GC. Let all the threads run again. -// By this point we've already unblocked most threads. This just releases the ThreadStore lock. void ThreadSuspend::ResumeAllThreads(BOOL SuspendSucceeded) { CONTRACTL { @@ -5359,7 +5357,7 @@ void Thread::MarkForSuspension(ULONG bit) _ASSERTE((m_State & bit) == 0); InterlockedOr((LONG*)&m_State, bit); - ThreadStore::TrapReturningThreadsIncrement(); + ThreadStore::IncrementTrapReturningThreads(); } void Thread::UnmarkForSuspension(ULONG mask) @@ -5378,7 +5376,7 @@ void Thread::UnmarkForSuspension(ULONG mask) _ASSERTE((m_State & ~mask) != 0); // we decrement the global first to be able to satisfy the assert from DbgFindThread - ThreadStore::TrapReturningThreadsDecrement(); + ThreadStore::DecrementTrapReturningThreads(); InterlockedAnd((LONG*)&m_State, mask); } From 1688024143c4f8fe5c1713fefc97df3250ecf0d8 Mon Sep 17 00:00:00 2001 From: vsadov <8218165+VSadov@users.noreply.github.com> Date: Sat, 4 May 2024 18:42:39 -0700 Subject: [PATCH 26/26] PR feedback --- src/coreclr/vm/threadsuspend.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp index d8f3363611c988..6e26cd996a4940 100644 --- a/src/coreclr/vm/threadsuspend.cpp +++ b/src/coreclr/vm/threadsuspend.cpp @@ -2103,17 +2103,14 @@ void Thread::RareDisablePreemptiveGC() goto Exit; } - // A thread that performs GC may switch modes inside GC and come here. - // We will not try suspending a thread that is responsible for the suspension. - if (this == ThreadSuspend::GetSuspensionThread()) - { - goto Exit; - } - if (ThreadStore::HoldingThreadStore(this)) { - // In theory threads should not try entering coop mode while holding TS lock, - // but some scenarios like GCCoopHackNoThread and GCX_COOP_NO_THREAD_BROKEN end up here + // A thread that performs GC may switch modes inside GC and come here. We would not want to + // suspend the thread that is responsible for the suspension. + // Ideally that would be the only the case when a thread that holds thread store lock + // may want to enter coop mode, but we have a number of other scenarios where TSL is acquired + // in coop mode or when TSL-owning thread tries to swtch to coop. + // We will handle all cases in the same way - by allowing the thread into coop mode without a wait. goto Exit; }