From 716690995907c9005976df76b7ae78f1300b6e2d Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Sat, 13 Jul 2024 00:56:18 -0700 Subject: [PATCH 1/5] Add ee_alloc_context (NativeAOT) This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it. The new ee_alloc_context::combined_limit should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path. --- src/coreclr/nativeaot/Runtime/AsmOffsets.h | 21 ++++++----- .../nativeaot/Runtime/AsmOffsetsVerify.cpp | 2 +- src/coreclr/nativeaot/Runtime/DebugHeader.cpp | 5 ++- src/coreclr/nativeaot/Runtime/GCHelpers.cpp | 1 + .../nativeaot/Runtime/amd64/AllocFast.S | 6 +-- .../nativeaot/Runtime/amd64/AllocFast.asm | 6 +-- .../nativeaot/Runtime/amd64/AsmMacros.inc | 4 +- src/coreclr/nativeaot/Runtime/arm/AllocFast.S | 10 ++--- .../nativeaot/Runtime/arm64/AllocFast.S | 10 ++--- .../nativeaot/Runtime/arm64/AllocFast.asm | 6 +-- .../nativeaot/Runtime/arm64/AsmMacros.h | 4 +- src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 10 +++++ .../nativeaot/Runtime/i386/AllocFast.asm | 6 +-- .../nativeaot/Runtime/i386/AsmMacros.inc | 4 +- src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 10 ++--- .../nativeaot/Runtime/loongarch64/AllocFast.S | 10 ++--- src/coreclr/nativeaot/Runtime/portable.cpp | 25 ++++++++----- src/coreclr/nativeaot/Runtime/thread.cpp | 1 + src/coreclr/nativeaot/Runtime/thread.h | 37 ++++++++++++++++++- src/coreclr/nativeaot/Runtime/thread.inl | 37 ++++++++++++++++++- .../Runtime/unix/unixasmmacrosamd64.inc | 4 +- .../Runtime/unix/unixasmmacrosarm.inc | 4 +- 22 files changed, 158 insertions(+), 65 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 32abd406175e7..cb6bf8842e04b 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -46,21 +46,24 @@ ASM_OFFSET( 0, 0, MethodTable, m_uFlags) ASM_OFFSET( 4, 4, MethodTable, m_uBaseSize) ASM_OFFSET( 14, 18, MethodTable, m_VTable) -ASM_OFFSET( 0, 0, Thread, m_rgbAllocContextBuffer) -ASM_OFFSET( 28, 38, Thread, m_ThreadStateFlags) -ASM_OFFSET( 2c, 40, Thread, m_pTransitionFrame) -ASM_OFFSET( 30, 48, Thread, m_pDeferredTransitionFrame) -ASM_OFFSET( 40, 68, Thread, m_ppvHijackedReturnAddressLocation) -ASM_OFFSET( 44, 70, Thread, m_pvHijackedReturnAddress) -ASM_OFFSET( 48, 78, Thread, m_uHijackedReturnValueFlags) -ASM_OFFSET( 4c, 80, Thread, m_pExInfoStackHead) -ASM_OFFSET( 50, 88, Thread, m_threadAbortException) +ASM_OFFSET( 0, 0, Thread, m_eeAllocContext) +ASM_OFFSET( 2c, 40, Thread, m_ThreadStateFlags) +ASM_OFFSET( 30, 48, Thread, m_pTransitionFrame) +ASM_OFFSET( 34, 50, Thread, m_pDeferredTransitionFrame) +ASM_OFFSET( 44, 70, Thread, m_ppvHijackedReturnAddressLocation) +ASM_OFFSET( 48, 78, Thread, m_pvHijackedReturnAddress) +ASM_OFFSET( 4c, 80, Thread, m_uHijackedReturnValueFlags) +ASM_OFFSET( 50, 88, Thread, m_pExInfoStackHead) +ASM_OFFSET( 54, 90, Thread, m_threadAbortException) ASM_SIZEOF( 14, 20, EHEnum) ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr) ASM_OFFSET( 4, 8, gc_alloc_context, alloc_limit) +ASM_OFFSET( 0, 0, ee_alloc_context, combined_limit) +ASM_OFFSET( 4, 8, ee_alloc_context, m_rgbAllocContextBuffer) + #ifdef FEATURE_CACHED_INTERFACE_DISPATCH ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) #ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index b5520d739e871..d27884dbdf1ff 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -22,7 +22,7 @@ class AsmOffsets { - static_assert(sizeof(Thread::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "Thread::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); + static_assert(sizeof(ee_alloc_context::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "ee_alloc_context::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); // Some assembly helpers for arrays and strings are shared and use the fact that arrays and strings have similar layouts) static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index 324e0f86f2aea..e32956dde4ee2 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -163,6 +163,9 @@ extern "C" void PopulateDebugHeaders() MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, finalize_queue); MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, generation_table); + MAKE_SIZE_ENTRY(ee_alloc_context); + MAKE_DEBUG_FIELD_ENTRY(ee_alloc_context, m_rgbAllocContextBuffer); + MAKE_SIZE_ENTRY(gc_alloc_context); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_ptr); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_limit); @@ -194,7 +197,7 @@ extern "C" void PopulateDebugHeaders() MAKE_SIZE_ENTRY(RuntimeThreadLocals); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pNext); - MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_rgbAllocContextBuffer); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_eeAllocContext); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_threadId); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pThreadStressLog); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pExInfoStackHead); diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp index 5833bd65c2f10..b749f68365817 100644 --- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp @@ -540,6 +540,7 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t tls_pLastAllocationEEType = pEEType; Object* pObject = GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), cbSize, uFlags); + pThread->GetEEAllocContext()->UpdateCombinedLimit(); if (pObject == NULL) return NULL; diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S index 6cb85bcc507a0..8923a7a4fbb64 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S @@ -28,7 +28,7 @@ NESTED_ENTRY RhpNewFast, _TEXT, NoHandler mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add rdx, rsi - cmp rdx, [rax + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rdx, [rax + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewFast_RarePath) // set the new alloc pointer @@ -143,7 +143,7 @@ NESTED_ENTRY RhNewString, _TEXT, NoHandler // rcx == Thread* // rdx == string size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhNewString_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -226,7 +226,7 @@ NESTED_ENTRY RhpNewArray, _TEXT, NoHandler // rcx == Thread* // rdx == array size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewArray_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm index 37be558c3cef1..6ba69c0c14127 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm @@ -25,7 +25,7 @@ LEAF_ENTRY RhpNewFast, _TEXT mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r8, rax - cmp r8, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r8, [rdx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewFast_RarePath ;; set the new alloc pointer @@ -118,7 +118,7 @@ LEAF_ENTRY RhNewString, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -179,7 +179,7 @@ LEAF_ENTRY RhpNewArray, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 33089b6643d38..41c43252317d9 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -336,8 +336,8 @@ TSF_DoNotTriggerGc equ 10h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit diff --git a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S index 31b54d1bca313..7609130369654 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S @@ -26,7 +26,7 @@ LEAF_ENTRY RhpNewFast, _TEXT ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r2, r3 - ldr r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r1, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r1 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -132,7 +132,7 @@ LEAF_ENTRY RhNewString, _TEXT adds r6, r12 bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhNewString_RarePath) @@ -213,7 +213,7 @@ LOCAL_LABEL(ArrayAlignSize): adds r6, r12 bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhpNewArray_RarePath) @@ -349,7 +349,7 @@ LEAF_ENTRY RhpNewFastAlign8, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(Alloc8Failed) @@ -412,7 +412,7 @@ LEAF_ENTRY RhpNewFastMisalign, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(BoxAlloc8Failed) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S index bb33e112ad5a6..6c61b2de35639 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S @@ -10,8 +10,8 @@ // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) @@ -44,7 +44,7 @@ // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -139,7 +139,7 @@ LOCAL_LABEL(NewOutOfMemory): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhNewString_Rare) @@ -207,7 +207,7 @@ LOCAL_LABEL(RhNewString_Rare): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhpNewArray_Rare) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm index e6849b8731266..d8e506335d77f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm @@ -30,7 +30,7 @@ ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi RhpNewFast_RarePath @@ -118,7 +118,7 @@ NewOutOfMemory ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare @@ -179,7 +179,7 @@ StringSizeOverflow ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 94a559df719e0..2f6e83e2cf9b6 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -87,8 +87,8 @@ STATUS_REDHAWK_THREAD_ABORT equ 0x43 ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit ;; ;; IMPORTS diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 7f304f9a4335f..4c45406292bf5 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -68,6 +68,16 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) SyncClean::CleanUp(); + // The GC may change allocation contexts during the GC using GcEnumAllocContexts() so we need to + // update the corresponding combined limits now that the GC is complete. Doing this within + // GcEnumAllocContexts() is challenging to do correctly or efficiently because multiple GC threads + // may enumerate and modify the allocation contexts concurrently. + FOREACH_THREAD(thread) + { + thread->GetEEAllocContext()->UpdateCombinedLimit(); + } + END_FOREACH_THREAD + GetThreadStore()->ResumeAllThreads(true); GCHeapUtilities::GetGCHeap()->SetGCInProgress(FALSE); GetThreadStore()->UnlockThreadStore(); diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm index 8d28e94c94417..d557f5ec75077 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm @@ -29,7 +29,7 @@ FASTCALL_FUNC RhpNewFast, 4 ;; add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja AllocFailed ;; set the new alloc pointer @@ -165,7 +165,7 @@ FASTCALL_FUNC RhNewString, 8 mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc StringAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja StringAllocContextOverflow ; ECX == allocation size @@ -282,7 +282,7 @@ ArrayAlignSize: mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc ArrayAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja ArrayAllocContextOverflow ; ECX == array size diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 896bf8e67dab5..9541f73940215 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -140,8 +140,8 @@ STATUS_REDHAWK_THREAD_ABORT equ 43h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit ;; ;; CONSTANTS -- SYMBOLS diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 7fb41a9d276a3..21a15a01c905e 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -495,15 +495,15 @@ struct PInvokeTransitionFrame #define PInvokeTransitionFrame_MAX_SIZE (sizeof(PInvokeTransitionFrame) + (POINTER_SIZE * PInvokeTransitionFrame_SaveRegs_count)) #ifdef TARGET_AMD64 -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_ARM64) -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_LOONGARCH64) -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_X86) -#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#define OFFSETOF__Thread__m_pTransitionFrame 0x30 #elif defined(TARGET_ARM) -#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#define OFFSETOF__Thread__m_pTransitionFrame 0x30 #endif typedef DPTR(MethodTable) PTR_EEType; diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S index 3ba82de580ba9..7cafcbda622ff 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S @@ -10,8 +10,8 @@ // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) @@ -44,7 +44,7 @@ // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t4, $a1, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t4, $a2, LOCAL_LABEL(RhpNewFast_RarePath) // Update the alloc pointer to account for the allocation. @@ -137,7 +137,7 @@ LOCAL_LABEL(NewOutOfMemory): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, LOCAL_LABEL(RhNewString_Rare) // Reload new object address into r12. @@ -199,7 +199,7 @@ LOCAL_LABEL(RhNewString_Rare): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, LOCAL_LABEL(RhpNewArray_Rare) // Reload new object address into t3. diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 318a10fd20a52..d42a6ecc67efe 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -64,8 +64,9 @@ FCIMPL1(Object *, RhpNewFast, MethodTable* pEEType) size_t size = pEEType->GetBaseSize(); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Object* pObject = (Object *)alloc_ptr; @@ -112,8 +113,9 @@ FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements) size = ALIGN_UP(size, sizeof(uintptr_t)); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Array* pObject = (Array*)alloc_ptr; @@ -165,8 +167,9 @@ FCIMPL1(Object*, RhpNewFastAlign8, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -199,8 +202,9 @@ FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -248,8 +252,9 @@ FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresAlignObject) diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index f5128d505838f..debb4eac8c363 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -353,6 +353,7 @@ void Thread::Detach() gc_alloc_context* context = GetAllocContext(); s_DeadThreadsNonAllocBytes += context->alloc_limit - context->alloc_ptr; GCHeapUtilities::GetGCHeap()->FixAllocContext(context, NULL, NULL); + GetEEAllocContext()->UpdateCombinedLimit(); SetDetached(); } diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 4c0a21e9f9ab7..70f776de2ee9a 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -83,9 +83,43 @@ struct InlinedThreadStaticRoot TypeManager* m_typeManager; }; +// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context +struct ee_alloc_context +{ + // Any allocation that would overlap combined_limit needs to be handled by the allocation slow path. + // combined_limit is the minimum of: + // - gc_alloc_context.alloc_limit (the end of the current AC) + // - the sampling_limit + // + // In the simple case that randomized sampling is disabled, combined_limit is always equal to alloc_limit. + // + // There are two different useful interpretations for the sampling_limit. One is to treat the sampling_limit + // as an address and when we allocate an object that overlaps that address we should emit a sampling event. + // The other is that we can treat (sampling_limit - alloc_ptr) as a budget of how many bytes we can allocate + // before emitting a sampling event. If we always allocated objects contiguously in the AC and incremented + // alloc_ptr by the size of the object, these two interpretations would be equivalent. However, when objects + // don't fit in the AC we allocate them in some other address range. The budget interpretation is more + // flexible to handle those cases. + // + // The sampling limit isn't stored in any separate field explicitly, instead it is implied: + // - if combined_limit == alloc_limit there is no sampled byte in the AC. In the budget interpretation + // we can allocate (alloc_limit - alloc_ptr) unsampled bytes. We'll need a new random number after + // that to determine whether future allocated bytes should be sampled. + // This occurs either because the sampling feature is disabled, or because the randomized selection + // of sampled bytes didn't select a byte in this AC. + // - if combined_limit < alloc_limit there is a sample limit in the AC. sample_limit = combined_limit. + uint8_t* combined_limit; + uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + + gc_alloc_context* GetGCAllocContext(); + uint8_t* GetCombinedLimit(); + void UpdateCombinedLimit(); +}; + + struct RuntimeThreadLocals { - uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + ee_alloc_context m_eeAllocContext; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum PInvokeTransitionFrame* m_pTransitionFrame; PInvokeTransitionFrame* m_pDeferredTransitionFrame; // see Thread::EnablePreemptiveMode @@ -215,6 +249,7 @@ class Thread : private RuntimeThreadLocals bool IsInitialized(); + ee_alloc_context * GetEEAllocContext(); gc_alloc_context * GetAllocContext(); uint64_t GetPalThreadIdForLogging(); diff --git a/src/coreclr/nativeaot/Runtime/thread.inl b/src/coreclr/nativeaot/Runtime/thread.inl index 2daffd0692213..ddc2848424605 100644 --- a/src/coreclr/nativeaot/Runtime/thread.inl +++ b/src/coreclr/nativeaot/Runtime/thread.inl @@ -2,6 +2,36 @@ // The .NET Foundation licenses this file to you under the MIT license. #ifndef DACCESS_COMPILE + + + +inline gc_alloc_context* ee_alloc_context::GetGCAllocContext() +{ + return (gc_alloc_context*)&m_rgbAllocContextBuffer; +} + +inline uint8_t* ee_alloc_context::GetCombinedLimit() +{ + return combined_limit; +} + +// It seems like there is a desire not to include a definition of gc_alloc_context in a more global place within +// the NativeAOT runtime? Instead some individual files include their own definition as needed and others reference +// gcinterface.h to get the official definition. This .inl file gets included from multiple places some of which +// do define the type and others that do not. To avoid getting a redefinition error I added this private definition. +struct _thread_inl_gc_alloc_context +{ + uint8_t* alloc_ptr; + uint8_t* alloc_limit; +}; + +inline void ee_alloc_context::UpdateCombinedLimit() +{ + // The randomized allocation sampling feature is being submitted in stages. For now sampling is never enabled so + // combined_limit is always the same as alloc_limit. + combined_limit = ((_thread_inl_gc_alloc_context*)GetGCAllocContext())->alloc_limit; +} + // Set the m_pDeferredTransitionFrame field for GC allocation helpers that setup transition frame // in assembly code. Do not use anywhere else. inline void Thread::SetDeferredTransitionFrame(PInvokeTransitionFrame* pTransitionFrame) @@ -59,9 +89,14 @@ inline void Thread::PopGCFrameRegistration(GCFrameRegistration* pRegistration) m_pGCFrameRegistrations = pRegistration->m_pNext; } +inline ee_alloc_context* Thread::GetEEAllocContext() +{ + return &m_eeAllocContext; +} + inline gc_alloc_context* Thread::GetAllocContext() { - return (gc_alloc_context*)m_rgbAllocContextBuffer; + return GetEEAllocContext()->GetGCAllocContext(); } inline bool Thread::IsStateSet(ThreadStateFlags flags) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index b1a437d8b57ea..05667a351a9d8 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -245,8 +245,8 @@ C_FUNC(\Name): // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -#define OFFSETOF__Thread__m_alloc_context__alloc_limit OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit // GC type flags #define GC_ALLOC_FINALIZE 1 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 68631819f7dee..4ccd38b19c7be 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -28,8 +28,8 @@ #define TrapThreadsFlags_TrapThreads 2 // Rename fields of nested structs -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) // GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). #define SIZEOF__MinObject 12 From 03da7d8f3ea58bd2f852287dc2190eb9c91a9c57 Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Mon, 15 Jul 2024 22:13:28 -0700 Subject: [PATCH 2/5] PR feedback - removed unnecessary UpdateCombinedLimit() in thread detach - updated comment for workaround on 96081 - swapped to updating combined_limit inside GcEnumAllocContexts() instead of in RestartEE() --- src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 22 +++++++++++----------- src/coreclr/nativeaot/Runtime/thread.cpp | 1 - src/coreclr/nativeaot/Runtime/thread.inl | 5 +---- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 4c45406292bf5..162cab516756c 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -68,16 +68,6 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) SyncClean::CleanUp(); - // The GC may change allocation contexts during the GC using GcEnumAllocContexts() so we need to - // update the corresponding combined limits now that the GC is complete. Doing this within - // GcEnumAllocContexts() is challenging to do correctly or efficiently because multiple GC threads - // may enumerate and modify the allocation contexts concurrently. - FOREACH_THREAD(thread) - { - thread->GetEEAllocContext()->UpdateCombinedLimit(); - } - END_FOREACH_THREAD - GetThreadStore()->ResumeAllThreads(true); GCHeapUtilities::GetGCHeap()->SetGCInProgress(FALSE); GetThreadStore()->UnlockThreadStore(); @@ -146,7 +136,17 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par { FOREACH_THREAD(thread) { - (*fn) (thread->GetAllocContext(), param); + ee_alloc_context* palloc_context = thread->GetEEAllocContext(); + gc_alloc_context* ac = palloc_context->GetGCAllocContext(); + (*fn) (ac, param); + // The GC may zero the alloc_ptr and alloc_limit fields of AC during enumeration and we need to keep + // combined_limit up-to-date. Note that the GC has multiple threads running this enumeration concurrently + // with no synchronization. If you need to change this code think carefully about how that concurrency + // may affect the results. + if(ac->alloc_limit == 0) + { + palloc_context->combined_limit = 0; + } } END_FOREACH_THREAD } diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index debb4eac8c363..f5128d505838f 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -353,7 +353,6 @@ void Thread::Detach() gc_alloc_context* context = GetAllocContext(); s_DeadThreadsNonAllocBytes += context->alloc_limit - context->alloc_ptr; GCHeapUtilities::GetGCHeap()->FixAllocContext(context, NULL, NULL); - GetEEAllocContext()->UpdateCombinedLimit(); SetDetached(); } diff --git a/src/coreclr/nativeaot/Runtime/thread.inl b/src/coreclr/nativeaot/Runtime/thread.inl index ddc2848424605..5c17da3e61f3f 100644 --- a/src/coreclr/nativeaot/Runtime/thread.inl +++ b/src/coreclr/nativeaot/Runtime/thread.inl @@ -15,10 +15,7 @@ inline uint8_t* ee_alloc_context::GetCombinedLimit() return combined_limit; } -// It seems like there is a desire not to include a definition of gc_alloc_context in a more global place within -// the NativeAOT runtime? Instead some individual files include their own definition as needed and others reference -// gcinterface.h to get the official definition. This .inl file gets included from multiple places some of which -// do define the type and others that do not. To avoid getting a redefinition error I added this private definition. +// Workaround for https://github.com/dotnet/runtime/issues/96081 struct _thread_inl_gc_alloc_context { uint8_t* alloc_ptr; From c457c292d3055bf366eb42c91c2f61edde0ff1b6 Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Tue, 16 Jul 2024 00:00:46 -0700 Subject: [PATCH 3/5] Update src/coreclr/nativeaot/Runtime/gcenv.ee.cpp Co-authored-by: Jan Kotas --- src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 162cab516756c..e366bdc59620b 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -143,7 +143,7 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par // combined_limit up-to-date. Note that the GC has multiple threads running this enumeration concurrently // with no synchronization. If you need to change this code think carefully about how that concurrency // may affect the results. - if(ac->alloc_limit == 0) + if (ac->alloc_limit == 0) { palloc_context->combined_limit = 0; } From 36ecf6d39d0bb4042ad35c075bb3eab867b411d2 Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Thu, 18 Jul 2024 17:51:18 -0700 Subject: [PATCH 4/5] Reduce unnecessary combined_limit writes --- src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index e366bdc59620b..0f9aa696d2338 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -143,7 +143,7 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par // combined_limit up-to-date. Note that the GC has multiple threads running this enumeration concurrently // with no synchronization. If you need to change this code think carefully about how that concurrency // may affect the results. - if (ac->alloc_limit == 0) + if (ac->alloc_limit == 0 && palloc_context->combined_limit != 0) { palloc_context->combined_limit = 0; } From 6f634cbce4d9c2919c9f060247aae5d160794241 Mon Sep 17 00:00:00 2001 From: Noah Falk Date: Tue, 15 Oct 2024 13:53:06 -0700 Subject: [PATCH 5/5] Bumping the debug header major version --- src/coreclr/nativeaot/Runtime/DebugHeader.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index e32956dde4ee2..051b9b0d8f7a8 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -77,7 +77,12 @@ struct DotNetRuntimeDebugHeader // This counter can be incremented to indicate breaking changes // This field must be encoded little endian, regardless of the typical endianness of // the machine - const uint16_t MajorVersion = 4; + // Changes: + // v1-v4 were never doc'ed but history is source control if you need it + // v5 - Thread now has an m_eeAllocContext field and the previous m_rgbAllocContextBuffer + // field is nested inside of it. + // + const uint16_t MajorVersion = 5; // This counter can be incremented to indicate back-compatible changes // This field must be encoded little endian, regardless of the typical endianness of