diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 32abd406175e7..cb6bf8842e04b 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -46,21 +46,24 @@ ASM_OFFSET( 0, 0, MethodTable, m_uFlags) ASM_OFFSET( 4, 4, MethodTable, m_uBaseSize) ASM_OFFSET( 14, 18, MethodTable, m_VTable) -ASM_OFFSET( 0, 0, Thread, m_rgbAllocContextBuffer) -ASM_OFFSET( 28, 38, Thread, m_ThreadStateFlags) -ASM_OFFSET( 2c, 40, Thread, m_pTransitionFrame) -ASM_OFFSET( 30, 48, Thread, m_pDeferredTransitionFrame) -ASM_OFFSET( 40, 68, Thread, m_ppvHijackedReturnAddressLocation) -ASM_OFFSET( 44, 70, Thread, m_pvHijackedReturnAddress) -ASM_OFFSET( 48, 78, Thread, m_uHijackedReturnValueFlags) -ASM_OFFSET( 4c, 80, Thread, m_pExInfoStackHead) -ASM_OFFSET( 50, 88, Thread, m_threadAbortException) +ASM_OFFSET( 0, 0, Thread, m_eeAllocContext) +ASM_OFFSET( 2c, 40, Thread, m_ThreadStateFlags) +ASM_OFFSET( 30, 48, Thread, m_pTransitionFrame) +ASM_OFFSET( 34, 50, Thread, m_pDeferredTransitionFrame) +ASM_OFFSET( 44, 70, Thread, m_ppvHijackedReturnAddressLocation) +ASM_OFFSET( 48, 78, Thread, m_pvHijackedReturnAddress) +ASM_OFFSET( 4c, 80, Thread, m_uHijackedReturnValueFlags) +ASM_OFFSET( 50, 88, Thread, m_pExInfoStackHead) +ASM_OFFSET( 54, 90, Thread, m_threadAbortException) ASM_SIZEOF( 14, 20, EHEnum) ASM_OFFSET( 0, 0, gc_alloc_context, alloc_ptr) ASM_OFFSET( 4, 8, gc_alloc_context, alloc_limit) +ASM_OFFSET( 0, 0, ee_alloc_context, combined_limit) +ASM_OFFSET( 4, 8, ee_alloc_context, m_rgbAllocContextBuffer) + #ifdef FEATURE_CACHED_INTERFACE_DISPATCH ASM_OFFSET( 4, 8, InterfaceDispatchCell, m_pCache) #ifdef INTERFACE_DISPATCH_CACHE_HAS_CELL_BACKPOINTER diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp index b5520d739e871..d27884dbdf1ff 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp +++ b/src/coreclr/nativeaot/Runtime/AsmOffsetsVerify.cpp @@ -22,7 +22,7 @@ class AsmOffsets { - static_assert(sizeof(Thread::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "Thread::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); + static_assert(sizeof(ee_alloc_context::m_rgbAllocContextBuffer) >= sizeof(gc_alloc_context), "ee_alloc_context::m_rgbAllocContextBuffer is not big enough to hold a gc_alloc_context"); // Some assembly helpers for arrays and strings are shared and use the fact that arrays and strings have similar layouts) static_assert(offsetof(Array, m_Length) == offsetof(String, m_Length), "The length field of String and Array have different offsets"); diff --git a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp index 324e0f86f2aea..051b9b0d8f7a8 100644 --- a/src/coreclr/nativeaot/Runtime/DebugHeader.cpp +++ b/src/coreclr/nativeaot/Runtime/DebugHeader.cpp @@ -77,7 +77,12 @@ struct DotNetRuntimeDebugHeader // This counter can be incremented to indicate breaking changes // This field must be encoded little endian, regardless of the typical endianness of // the machine - const uint16_t MajorVersion = 4; + // Changes: + // v1-v4 were never doc'ed but history is source control if you need it + // v5 - Thread now has an m_eeAllocContext field and the previous m_rgbAllocContextBuffer + // field is nested inside of it. + // + const uint16_t MajorVersion = 5; // This counter can be incremented to indicate back-compatible changes // This field must be encoded little endian, regardless of the typical endianness of @@ -163,6 +168,9 @@ extern "C" void PopulateDebugHeaders() MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, finalize_queue); MAKE_DEBUG_FIELD_ENTRY(dac_gc_heap, generation_table); + MAKE_SIZE_ENTRY(ee_alloc_context); + MAKE_DEBUG_FIELD_ENTRY(ee_alloc_context, m_rgbAllocContextBuffer); + MAKE_SIZE_ENTRY(gc_alloc_context); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_ptr); MAKE_DEBUG_FIELD_ENTRY(gc_alloc_context, alloc_limit); @@ -194,7 +202,7 @@ extern "C" void PopulateDebugHeaders() MAKE_SIZE_ENTRY(RuntimeThreadLocals); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pNext); - MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_rgbAllocContextBuffer); + MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_eeAllocContext); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_threadId); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pThreadStressLog); MAKE_DEBUG_FIELD_ENTRY(RuntimeThreadLocals, m_pExInfoStackHead); diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp index 5833bd65c2f10..b749f68365817 100644 --- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp @@ -540,6 +540,7 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t tls_pLastAllocationEEType = pEEType; Object* pObject = GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), cbSize, uFlags); + pThread->GetEEAllocContext()->UpdateCombinedLimit(); if (pObject == NULL) return NULL; diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S index 6cb85bcc507a0..8923a7a4fbb64 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.S @@ -28,7 +28,7 @@ NESTED_ENTRY RhpNewFast, _TEXT, NoHandler mov rsi, [rax + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add rdx, rsi - cmp rdx, [rax + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rdx, [rax + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewFast_RarePath) // set the new alloc pointer @@ -143,7 +143,7 @@ NESTED_ENTRY RhNewString, _TEXT, NoHandler // rcx == Thread* // rdx == string size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhNewString_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -226,7 +226,7 @@ NESTED_ENTRY RhpNewArray, _TEXT, NoHandler // rcx == Thread* // rdx == array size // r12 == element count - cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [rcx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja LOCAL_LABEL(RhpNewArray_RarePath) mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm index 37be558c3cef1..6ba69c0c14127 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/AllocFast.asm @@ -25,7 +25,7 @@ LEAF_ENTRY RhpNewFast, _TEXT mov rax, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r8, rax - cmp r8, [rdx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp r8, [rdx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewFast_RarePath ;; set the new alloc pointer @@ -118,7 +118,7 @@ LEAF_ENTRY RhNewString, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax @@ -179,7 +179,7 @@ LEAF_ENTRY RhpNewArray, _TEXT ; rdx == element count ; r8 == array size ; r10 == thread - cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp rax, [r10 + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja RhpNewArrayRare mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 33089b6643d38..41c43252317d9 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -336,8 +336,8 @@ TSF_DoNotTriggerGc equ 10h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit diff --git a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S index 31b54d1bca313..7609130369654 100644 --- a/src/coreclr/nativeaot/Runtime/arm/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm/AllocFast.S @@ -26,7 +26,7 @@ LEAF_ENTRY RhpNewFast, _TEXT ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] add r2, r3 - ldr r1, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r1, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r1 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -132,7 +132,7 @@ LEAF_ENTRY RhNewString, _TEXT adds r6, r12 bcs LOCAL_LABEL(RhNewString_RarePath) // if we get a carry here, the string is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhNewString_RarePath) @@ -213,7 +213,7 @@ LOCAL_LABEL(ArrayAlignSize): adds r6, r12 bcs LOCAL_LABEL(RhpNewArray_RarePath) // if we get a carry here, the array is too large to fit below 4 GB - ldr r12, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r12, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r6, r12 bhi LOCAL_LABEL(RhpNewArray_RarePath) @@ -349,7 +349,7 @@ LEAF_ENTRY RhpNewFastAlign8, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(Alloc8Failed) @@ -412,7 +412,7 @@ LEAF_ENTRY RhpNewFastMisalign, _TEXT // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add r2, r3 - ldr r3, [r0, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr r3, [r0, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp r2, r3 bhi LOCAL_LABEL(BoxAlloc8Failed) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S index bb33e112ad5a6..6c61b2de35639 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.S @@ -10,8 +10,8 @@ // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) @@ -44,7 +44,7 @@ // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi LOCAL_LABEL(RhpNewFast_RarePath) @@ -139,7 +139,7 @@ LOCAL_LABEL(NewOutOfMemory): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhNewString_Rare) @@ -207,7 +207,7 @@ LOCAL_LABEL(RhNewString_Rare): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi LOCAL_LABEL(RhpNewArray_Rare) diff --git a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm index e6849b8731266..d8e506335d77f 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/AllocFast.asm @@ -30,7 +30,7 @@ ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x13, [x1, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x13 bhi RhpNewFast_RarePath @@ -118,7 +118,7 @@ NewOutOfMemory ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare @@ -179,7 +179,7 @@ StringSizeOverflow ;; Determine whether the end of the object would lie outside of the current allocation context. If so, ;; we abandon the attempt to allocate the object directly and fall back to the slow helper. add x2, x2, x12 - ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + ldr x12, [x3, #OFFSETOF__Thread__m_eeAllocContext__combined_limit] cmp x2, x12 bhi RhpNewArrayRare diff --git a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h index 94a559df719e0..2f6e83e2cf9b6 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h +++ b/src/coreclr/nativeaot/Runtime/arm64/AsmMacros.h @@ -87,8 +87,8 @@ STATUS_REDHAWK_THREAD_ABORT equ 0x43 ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit ;; ;; IMPORTS diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 7f304f9a4335f..0f9aa696d2338 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -136,7 +136,17 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par { FOREACH_THREAD(thread) { - (*fn) (thread->GetAllocContext(), param); + ee_alloc_context* palloc_context = thread->GetEEAllocContext(); + gc_alloc_context* ac = palloc_context->GetGCAllocContext(); + (*fn) (ac, param); + // The GC may zero the alloc_ptr and alloc_limit fields of AC during enumeration and we need to keep + // combined_limit up-to-date. Note that the GC has multiple threads running this enumeration concurrently + // with no synchronization. If you need to change this code think carefully about how that concurrency + // may affect the results. + if (ac->alloc_limit == 0 && palloc_context->combined_limit != 0) + { + palloc_context->combined_limit = 0; + } } END_FOREACH_THREAD } diff --git a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm index 8d28e94c94417..d557f5ec75077 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm +++ b/src/coreclr/nativeaot/Runtime/i386/AllocFast.asm @@ -29,7 +29,7 @@ FASTCALL_FUNC RhpNewFast, 4 ;; add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja AllocFailed ;; set the new alloc pointer @@ -165,7 +165,7 @@ FASTCALL_FUNC RhNewString, 8 mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc StringAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja StringAllocContextOverflow ; ECX == allocation size @@ -282,7 +282,7 @@ ArrayAlignSize: mov ecx, eax add eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_ptr] jc ArrayAllocContextOverflow - cmp eax, [edx + OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp eax, [edx + OFFSETOF__Thread__m_eeAllocContext__combined_limit] ja ArrayAllocContextOverflow ; ECX == array size diff --git a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc index 896bf8e67dab5..9541f73940215 100644 --- a/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/i386/AsmMacros.inc @@ -140,8 +140,8 @@ STATUS_REDHAWK_THREAD_ABORT equ 43h ;; ;; Rename fields of nested structs ;; -OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit equ OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +OFFSETOF__Thread__m_alloc_context__alloc_ptr equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_eeAllocContext__combined_limit equ OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit ;; ;; CONSTANTS -- SYMBOLS diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 7fb41a9d276a3..21a15a01c905e 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -495,15 +495,15 @@ struct PInvokeTransitionFrame #define PInvokeTransitionFrame_MAX_SIZE (sizeof(PInvokeTransitionFrame) + (POINTER_SIZE * PInvokeTransitionFrame_SaveRegs_count)) #ifdef TARGET_AMD64 -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_ARM64) -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_LOONGARCH64) -#define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#define OFFSETOF__Thread__m_pTransitionFrame 0x48 #elif defined(TARGET_X86) -#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#define OFFSETOF__Thread__m_pTransitionFrame 0x30 #elif defined(TARGET_ARM) -#define OFFSETOF__Thread__m_pTransitionFrame 0x2c +#define OFFSETOF__Thread__m_pTransitionFrame 0x30 #endif typedef DPTR(MethodTable) PTR_EEType; diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S index 3ba82de580ba9..7cafcbda622ff 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S @@ -10,8 +10,8 @@ // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) @@ -44,7 +44,7 @@ // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t4, $a1, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t4, $a2, LOCAL_LABEL(RhpNewFast_RarePath) // Update the alloc pointer to account for the allocation. @@ -137,7 +137,7 @@ LOCAL_LABEL(NewOutOfMemory): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, LOCAL_LABEL(RhNewString_Rare) // Reload new object address into r12. @@ -199,7 +199,7 @@ LOCAL_LABEL(RhNewString_Rare): // Determine whether the end of the object would lie outside of the current allocation context. If so, // we abandon the attempt to allocate the object directly and fall back to the slow helper. add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + ld.d $t3, $a3, OFFSETOF__Thread__m_eeAllocContext__combined_limit bltu $t3, $a2, LOCAL_LABEL(RhpNewArray_Rare) // Reload new object address into t3. diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 318a10fd20a52..d42a6ecc67efe 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -64,8 +64,9 @@ FCIMPL1(Object *, RhpNewFast, MethodTable* pEEType) size_t size = pEEType->GetBaseSize(); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Object* pObject = (Object *)alloc_ptr; @@ -112,8 +113,9 @@ FCIMPL2(Array *, RhpNewArray, MethodTable * pArrayEEType, int numElements) size = ALIGN_UP(size, sizeof(uintptr_t)); uint8_t* alloc_ptr = acontext->alloc_ptr; - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= size) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= size) { acontext->alloc_ptr = alloc_ptr + size; Array* pObject = (Array*)alloc_ptr; @@ -165,8 +167,9 @@ FCIMPL1(Object*, RhpNewFastAlign8, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -199,8 +202,9 @@ FCIMPL1(Object*, RhpNewFastMisalign, MethodTable* pEEType) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresPadding) @@ -248,8 +252,9 @@ FCIMPL2(Array*, RhpNewArrayAlign8, MethodTable* pArrayEEType, int numElements) paddedSize += 12; } - ASSERT(alloc_ptr <= acontext->alloc_limit); - if ((size_t)(acontext->alloc_limit - alloc_ptr) >= paddedSize) + uint8_t* combined_limit = pCurThread->GetEEAllocContext()->GetCombinedLimit(); + ASSERT(alloc_ptr <= combined_limit); + if ((size_t)(combined_limit - alloc_ptr) >= paddedSize) { acontext->alloc_ptr = alloc_ptr + paddedSize; if (requiresAlignObject) diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 4c0a21e9f9ab7..70f776de2ee9a 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -83,9 +83,43 @@ struct InlinedThreadStaticRoot TypeManager* m_typeManager; }; +// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context +struct ee_alloc_context +{ + // Any allocation that would overlap combined_limit needs to be handled by the allocation slow path. + // combined_limit is the minimum of: + // - gc_alloc_context.alloc_limit (the end of the current AC) + // - the sampling_limit + // + // In the simple case that randomized sampling is disabled, combined_limit is always equal to alloc_limit. + // + // There are two different useful interpretations for the sampling_limit. One is to treat the sampling_limit + // as an address and when we allocate an object that overlaps that address we should emit a sampling event. + // The other is that we can treat (sampling_limit - alloc_ptr) as a budget of how many bytes we can allocate + // before emitting a sampling event. If we always allocated objects contiguously in the AC and incremented + // alloc_ptr by the size of the object, these two interpretations would be equivalent. However, when objects + // don't fit in the AC we allocate them in some other address range. The budget interpretation is more + // flexible to handle those cases. + // + // The sampling limit isn't stored in any separate field explicitly, instead it is implied: + // - if combined_limit == alloc_limit there is no sampled byte in the AC. In the budget interpretation + // we can allocate (alloc_limit - alloc_ptr) unsampled bytes. We'll need a new random number after + // that to determine whether future allocated bytes should be sampled. + // This occurs either because the sampling feature is disabled, or because the randomized selection + // of sampled bytes didn't select a byte in this AC. + // - if combined_limit < alloc_limit there is a sample limit in the AC. sample_limit = combined_limit. + uint8_t* combined_limit; + uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + + gc_alloc_context* GetGCAllocContext(); + uint8_t* GetCombinedLimit(); + void UpdateCombinedLimit(); +}; + + struct RuntimeThreadLocals { - uint8_t m_rgbAllocContextBuffer[SIZEOF_ALLOC_CONTEXT]; + ee_alloc_context m_eeAllocContext; uint32_t volatile m_ThreadStateFlags; // see Thread::ThreadStateFlags enum PInvokeTransitionFrame* m_pTransitionFrame; PInvokeTransitionFrame* m_pDeferredTransitionFrame; // see Thread::EnablePreemptiveMode @@ -215,6 +249,7 @@ class Thread : private RuntimeThreadLocals bool IsInitialized(); + ee_alloc_context * GetEEAllocContext(); gc_alloc_context * GetAllocContext(); uint64_t GetPalThreadIdForLogging(); diff --git a/src/coreclr/nativeaot/Runtime/thread.inl b/src/coreclr/nativeaot/Runtime/thread.inl index 2daffd0692213..5c17da3e61f3f 100644 --- a/src/coreclr/nativeaot/Runtime/thread.inl +++ b/src/coreclr/nativeaot/Runtime/thread.inl @@ -2,6 +2,33 @@ // The .NET Foundation licenses this file to you under the MIT license. #ifndef DACCESS_COMPILE + + + +inline gc_alloc_context* ee_alloc_context::GetGCAllocContext() +{ + return (gc_alloc_context*)&m_rgbAllocContextBuffer; +} + +inline uint8_t* ee_alloc_context::GetCombinedLimit() +{ + return combined_limit; +} + +// Workaround for https://github.com/dotnet/runtime/issues/96081 +struct _thread_inl_gc_alloc_context +{ + uint8_t* alloc_ptr; + uint8_t* alloc_limit; +}; + +inline void ee_alloc_context::UpdateCombinedLimit() +{ + // The randomized allocation sampling feature is being submitted in stages. For now sampling is never enabled so + // combined_limit is always the same as alloc_limit. + combined_limit = ((_thread_inl_gc_alloc_context*)GetGCAllocContext())->alloc_limit; +} + // Set the m_pDeferredTransitionFrame field for GC allocation helpers that setup transition frame // in assembly code. Do not use anywhere else. inline void Thread::SetDeferredTransitionFrame(PInvokeTransitionFrame* pTransitionFrame) @@ -59,9 +86,14 @@ inline void Thread::PopGCFrameRegistration(GCFrameRegistration* pRegistration) m_pGCFrameRegistrations = pRegistration->m_pNext; } +inline ee_alloc_context* Thread::GetEEAllocContext() +{ + return &m_eeAllocContext; +} + inline gc_alloc_context* Thread::GetAllocContext() { - return (gc_alloc_context*)m_rgbAllocContextBuffer; + return GetEEAllocContext()->GetGCAllocContext(); } inline bool Thread::IsStateSet(ThreadStateFlags flags) diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index b1a437d8b57ea..05667a351a9d8 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -245,8 +245,8 @@ C_FUNC(\Name): // // Rename fields of nested structs // -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -#define OFFSETOF__Thread__m_alloc_context__alloc_limit OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit // GC type flags #define GC_ALLOC_FINALIZE 1 diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc index 68631819f7dee..4ccd38b19c7be 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosarm.inc @@ -28,8 +28,8 @@ #define TrapThreadsFlags_TrapThreads 2 // Rename fields of nested structs -#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) -#define OFFSETOF__Thread__m_alloc_context__alloc_limit (OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit) +#define OFFSETOF__Thread__m_alloc_context__alloc_ptr (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr) +#define OFFSETOF__Thread__m_eeAllocContext__combined_limit (OFFSETOF__Thread__m_eeAllocContext + OFFSETOF__ee_alloc_context__combined_limit) // GC minimal sized object. We use this to switch between 4 and 8 byte alignment in the GC heap (see AllocFast.asm). #define SIZEOF__MinObject 12