From d379d385e5f066cc53e80858cc0863d50a4fd675 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Mon, 3 Feb 2025 22:21:02 +0100
Subject: [PATCH 01/17] Better fix

---
 src/coreclr/inc/corinfo.h                     |  1 +
 src/coreclr/inc/jiteeversionguid.h            | 10 +++---
 src/coreclr/inc/jithelpers.h                  |  1 +
 src/coreclr/jit/assertionprop.cpp             |  1 +
 src/coreclr/jit/flowgraph.cpp                 | 10 ++++++
 src/coreclr/jit/importer.cpp                  | 27 ++++++++++++++++
 src/coreclr/jit/utils.cpp                     |  1 +
 .../Common/JitInterface/CorInfoHelpFunc.cs    |  1 +
 src/coreclr/vm/gchelpers.cpp                  | 10 ++++++
 src/coreclr/vm/jitinterface.h                 |  1 +
 src/coreclr/vm/reflectioninvocation.cpp       | 32 +++++++++++++++++--
 11 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index 9d01304c6639d8..fd98627ff82da5 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -440,6 +440,7 @@ enum CorInfoHelpFunc
     CORINFO_HELP_ASSIGN_REF,        // universal helpers with F_CALL_CONV calling convention
     CORINFO_HELP_CHECKED_ASSIGN_REF,
     CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP,  // Do the store, and ensure that the target was not in the heap.
+    CORINFO_HELP_ENSURE_NONHEAP,             // Ensure that the target was not in the heap.
 
     CORINFO_HELP_ASSIGN_BYREF,
     CORINFO_HELP_BULK_WRITEBARRIER,
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index 2db16d00df135b..5991a686d9bf39 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* cc0e7adf-e397-40b6-9d14-a7149815c991 */
-    0xcc0e7adf,
-    0xe397,
-    0x40b6,
-    {0x9d, 0x14, 0xa7, 0x14, 0x98, 0x15, 0xc9, 0x91}
+constexpr GUID JITEEVersionIdentifier = { /* bcc768c7-c29d-467d-89f6-f5adca5f2e59 */
+    0xbcc768c7,
+    0xc29d,
+    0x467d,
+    {0x89, 0xf6, 0xf5, 0xad, 0xca, 0x5f, 0x2e, 0x59}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index 38114a9bbfcada..a1e1822c0d27e4 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -157,6 +157,7 @@
     DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF,   JIT_WriteBarrier,   METHOD__NIL)
     DYNAMICJITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF, JIT_CheckedWriteBarrier,METHOD__NIL)
     JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,METHOD__NIL)
+    JITHELPER(CORINFO_HELP_ENSURE_NONHEAP, JIT_EnsureNonHeapTarget,METHOD__NIL)
 
     DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,METHOD__NIL)
     DYNAMICJITHELPER(CORINFO_HELP_BULK_WRITEBARRIER, NULL, METHOD__BUFFER__MEMCOPYGC)
diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index ef91f916fb9563..76fa1656fa1945 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -6716,6 +6716,7 @@ Statement* Compiler::optVNAssertionPropCurStmt(BasicBlock* block, Statement* stm
     // anything in assertion gen.
     optAssertionPropagatedCurrentStmt = false;
 
+
     VNAssertionPropVisitorInfo data(this, block, stmt);
     fgWalkTreePost(stmt->GetRootNodePointer(), Compiler::optVNAssertionPropCurStmtVisitor, &data);
 
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index eaed687a57eee2..3de5498bdb81dc 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -2457,6 +2457,16 @@ PhaseStatus Compiler::fgAddInternal()
 
     noway_assert(!dbgHandle || !pDbgHandle);
 
+#if DEBUG
+    // TODO: to be enabled only under jit-stress
+    if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
+    {
+        GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
+        fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
+        madeChanges = true;
+    }
+#endif
+
     if (dbgHandle || pDbgHandle)
     {
         // Test the JustMyCode VM global state variable
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 0435ecf0b514e4..690b334ff211a1 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -843,6 +843,17 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             // TODO-Bug?: verify if flags matter here
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
+
+            if (!impIsAddressInLocal(destAddr) && (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+            {
+                unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
+                lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
+                GenTree* comma = gtNewOperNode(GT_COMMA, store->TypeGet(), gtNewStoreLclVarNode(tmp, srcCall), gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()));
+                store->Data() = comma;
+                comma->AsOp()->gtOp1 = impStoreStruct(comma->gtGetOp1(), curLevel, pAfterStmt, di, block);
+                return impStoreStruct(store, curLevel, pAfterStmt, di, block);
+            }
+
             NewCallArg   newArg     = NewCallArg::Primitive(destAddr).WellKnown(wellKnownArgType);
 
             if (destAddr->OperIs(GT_LCL_ADDR))
@@ -953,6 +964,22 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             // TODO-Bug?: verify if flags matter here
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
+
+            if (!impIsAddressInLocal(destAddr) && (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+            {
+                unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
+                lvaSetStruct(tmp, call->gtRetClsHnd, false);
+                destAddr = gtNewLclVarAddrNode(tmp, TYP_BYREF);
+                // Insert address of temp into existing call
+                call->gtArgs.InsertAfterThisOrFirst(this,
+                    NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer));
+                // Now the store needs to copy from the new temp instead.
+                store->Data() = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet());
+                call->gtType = TYP_VOID;
+                src->gtType = TYP_VOID;
+                return gtNewOperNode(GT_COMMA, TYP_VOID, src, impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block));
+            }
+
             call->gtArgs.InsertAfterThisOrFirst(this,
                                                 NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer));
 
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index b308408a47e5d5..bb0d62af848f77 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1770,6 +1770,7 @@ void HelperCallProperties::init()
                 isNoGC = true;
                 FALLTHROUGH;
             case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP:
+            case CORINFO_HELP_ENSURE_NONHEAP:
             case CORINFO_HELP_BULK_WRITEBARRIER:
                 mutatesHeap = true;
                 break;
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index 04bca41e476671..d17c59018fa040 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -131,6 +131,7 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_ASSIGN_REF,        // universal helpers with F_CALL_CONV calling convention
         CORINFO_HELP_CHECKED_ASSIGN_REF,
         CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP,  // Do the store, and ensure that the target was not in the heap.
+        CORINFO_HELP_ENSURE_NONHEAP,             // Ensure that the target was not in the heap.
 
         CORINFO_HELP_ASSIGN_BYREF,
         CORINFO_HELP_BULK_WRITEBARRIER,
diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp
index f002fbe1b59a27..048e840728feda 100644
--- a/src/coreclr/vm/gchelpers.cpp
+++ b/src/coreclr/vm/gchelpers.cpp
@@ -1466,6 +1466,16 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrierEnsureNonHeapTarget, Object **dst,
 }
 HCIMPLEND_RAW
 
+extern "C" HCIMPL1_RAW(VOID, JIT_EnsureNonHeapTarget, void *dst)
+{
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+    STATIC_CONTRACT_THROWS;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+
+    _ASSERT(!GCHeapUtilities::GetGCHeap()->IsHeapPointer(dst));
+}
+HCIMPLEND_RAW
+
 // This function sets the card table with the granularity of 1 byte, to avoid ghost updates
 //    that could occur if multiple threads were trying to set different bits in the same card.
 
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index 1d20db72f5f244..2383af4a74977b 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -231,6 +231,7 @@ extern "C" FCDECL2(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref);
 
 extern "C" FCDECL2(VOID, JIT_WriteBarrier, Object **dst, Object *ref);
 extern "C" FCDECL2(VOID, JIT_WriteBarrierEnsureNonHeapTarget, Object **dst, Object *ref);
+extern "C" FCDECL1(VOID, JIT_EnsureNonHeapTarget, void *dst);
 
 // ARM64 JIT_WriteBarrier uses special ABI and thus is not callable directly
 // Copied write barriers must be called at a different location
diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp
index 8fa0bbd817d55c..6f96903a6ba281 100644
--- a/src/coreclr/vm/reflectioninvocation.cpp
+++ b/src/coreclr/vm/reflectioninvocation.cpp
@@ -492,6 +492,9 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod(
     // If an exception occurs a gc may happen but we are going to dump the stack anyway and we do
     // not need to protect anything.
 
+    // Allocate a local buffer for the return buffer if necessary
+    PVOID pLocalRetBuf = nullptr;
+
     {
     BEGINFORBIDGC();
 #ifdef _DEBUG
@@ -501,8 +504,19 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod(
     // Take care of any return arguments
     if (fHasRetBuffArg)
     {
-        PVOID pRetBuff = gc.retVal->GetData();
-        *((LPVOID*) (pTransitionBlock + argit.GetRetBuffArgOffset())) = pRetBuff;
+        _ASSERT(hasValueTypeReturn);
+        PTR_MethodTable pMT = retTH.GetMethodTable();
+        size_t localRetBufSize = retTH.GetSize();
+
+        // Allocate a local buffer. The invoked method will write the return value to this
+        // buffer which will be copied to gc.retVal later.
+        pLocalRetBuf = _alloca(localRetBufSize);
+        ZeroMemory(pLocalRetBuf, localRetBufSize);
+        *((LPVOID*) (pTransitionBlock + argit.GetRetBuffArgOffset())) = pLocalRetBuf;
+        if (pMT->ContainsGCPointers())
+        {
+            pValueClasses = new (_alloca(sizeof(ValueClassInfo))) ValueClassInfo(pLocalRetBuf, pMT, pValueClasses);
+        }
     }
 
     // copy args
@@ -572,6 +586,19 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod(
     // Call the method
     CallDescrWorkerWithHandler(&callDescrData);
 
+    if (fHasRetBuffArg)
+    {
+        // Copy the return value from the return buffer to the object
+        if (retTH.GetMethodTable()->ContainsGCPointers())
+        {
+            memmoveGCRefs(gc.retVal->GetData(), pLocalRetBuf, retTH.GetSize());
+        }
+        else
+        {
+            memcpyNoGCRefs(gc.retVal->GetData(), pLocalRetBuf, retTH.GetSize());
+        }
+    }
+
     // It is still illegal to do a GC here.  The return type might have/contain GC pointers.
     if (fConstructor)
     {
@@ -642,7 +669,6 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod(
 
     if (pProtectValueClassFrame != NULL)
         pProtectValueClassFrame->Pop(pThread);
-
     }
 
 Done:

From 0be96a0c64239dac8b3b2ebe2d2c7a4f0f50a104 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 00:55:50 +0100
Subject: [PATCH 02/17] clean up

---
 src/coreclr/inc/corinfo.h                            |  3 ++-
 src/coreclr/inc/jiteeversionguid.h                   | 10 +++++-----
 src/coreclr/inc/jithelpers.h                         |  2 +-
 src/coreclr/jit/flowgraph.cpp                        | 12 ++++++------
 .../tools/Common/JitInterface/CorInfoHelpFunc.cs     |  3 ++-
 5 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index fd98627ff82da5..33e6e9fae1e473 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -440,7 +440,6 @@ enum CorInfoHelpFunc
     CORINFO_HELP_ASSIGN_REF,        // universal helpers with F_CALL_CONV calling convention
     CORINFO_HELP_CHECKED_ASSIGN_REF,
     CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP,  // Do the store, and ensure that the target was not in the heap.
-    CORINFO_HELP_ENSURE_NONHEAP,             // Ensure that the target was not in the heap.
 
     CORINFO_HELP_ASSIGN_BYREF,
     CORINFO_HELP_BULK_WRITEBARRIER,
@@ -595,6 +594,8 @@ enum CorInfoHelpFunc
     CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
     CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
 
+    CORINFO_HELP_ENSURE_NONHEAP,            // Ensure that the target was not in the heap.
+
     CORINFO_HELP_COUNT,
 };
 
diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h
index 5991a686d9bf39..2db16d00df135b 100644
--- a/src/coreclr/inc/jiteeversionguid.h
+++ b/src/coreclr/inc/jiteeversionguid.h
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* bcc768c7-c29d-467d-89f6-f5adca5f2e59 */
-    0xbcc768c7,
-    0xc29d,
-    0x467d,
-    {0x89, 0xf6, 0xf5, 0xad, 0xca, 0x5f, 0x2e, 0x59}
+constexpr GUID JITEEVersionIdentifier = { /* cc0e7adf-e397-40b6-9d14-a7149815c991 */
+    0xcc0e7adf,
+    0xe397,
+    0x40b6,
+    {0x9d, 0x14, 0xa7, 0x14, 0x98, 0x15, 0xc9, 0x91}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index a1e1822c0d27e4..9976016820dd90 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -157,7 +157,6 @@
     DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF,   JIT_WriteBarrier,   METHOD__NIL)
     DYNAMICJITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF, JIT_CheckedWriteBarrier,METHOD__NIL)
     JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,METHOD__NIL)
-    JITHELPER(CORINFO_HELP_ENSURE_NONHEAP, JIT_EnsureNonHeapTarget,METHOD__NIL)
 
     DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,METHOD__NIL)
     DYNAMICJITHELPER(CORINFO_HELP_BULK_WRITEBARRIER, NULL, METHOD__BUFFER__MEMCOPYGC)
@@ -342,6 +341,7 @@
     JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, NULL, METHOD__NIL)
     JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, NULL, METHOD__NIL)
 #endif
+    JITHELPER(CORINFO_HELP_ENSURE_NONHEAP, JIT_EnsureNonHeapTarget,METHOD__NIL)
 
 #undef JITHELPER
 #undef DYNAMICJITHELPER
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 3de5498bdb81dc..8a414d33d0ddf4 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -2459,12 +2459,12 @@ PhaseStatus Compiler::fgAddInternal()
 
 #if DEBUG
     // TODO: to be enabled only under jit-stress
-    if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
-    {
-        GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
-        fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
-        madeChanges = true;
-    }
+    // if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
+    // {
+    //     GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
+    //     fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
+    //     madeChanges = true;
+    // }
 #endif
 
     if (dbgHandle || pDbgHandle)
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index d17c59018fa040..88cd51c01e16b9 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -131,7 +131,6 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_ASSIGN_REF,        // universal helpers with F_CALL_CONV calling convention
         CORINFO_HELP_CHECKED_ASSIGN_REF,
         CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP,  // Do the store, and ensure that the target was not in the heap.
-        CORINFO_HELP_ENSURE_NONHEAP,             // Ensure that the target was not in the heap.
 
         CORINFO_HELP_ASSIGN_BYREF,
         CORINFO_HELP_BULK_WRITEBARRIER,
@@ -285,6 +284,8 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
         CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
 
+        CORINFO_HELP_ENSURE_NONHEAP,            // Ensure that the target was not in the heap.
+
         CORINFO_HELP_COUNT,
     }
 }

From 294ef66c76d873a9fdfa01cb640283d4fe5a0236 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 01:56:48 +0100
Subject: [PATCH 03/17] let's see the diffs

---
 src/coreclr/jit/compiler.h   |  2 +-
 src/coreclr/jit/importer.cpp | 40 +++++++++++++++++++++---------------
 src/coreclr/jit/lclvars.cpp  |  2 +-
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index a5472a3be8de06..688cf35e39acbc 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -4901,7 +4901,7 @@ class Compiler
                              Statement**      pAfterStmt = nullptr,
                              const DebugInfo& di         = DebugInfo(),
                              BasicBlock*      block      = nullptr);
-    GenTree* impStoreStructPtr(GenTree* destAddr, GenTree* value, unsigned curLevel);
+    GenTree* impStoreStructPtr(GenTree* destAddr, GenTree* value, unsigned curLevel, GenTreeFlags indirFlags = GTF_EMPTY);
 
     GenTree* impGetNodeAddr(GenTree* val, unsigned curLevel, GenTreeFlags* pDerefFlags);
 
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 690b334ff211a1..c055f738d9cda0 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -844,17 +844,19 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
 
-            if (!impIsAddressInLocal(destAddr) && (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+            if (!impIsAddressInLocal(destAddr) &&
+                (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
-                GenTree* comma = gtNewOperNode(GT_COMMA, store->TypeGet(), gtNewStoreLclVarNode(tmp, srcCall), gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()));
-                store->Data() = comma;
+                GenTree* comma       = gtNewOperNode(GT_COMMA, store->TypeGet(), gtNewStoreLclVarNode(tmp, srcCall),
+                                                     gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()));
+                store->Data()        = comma;
                 comma->AsOp()->gtOp1 = impStoreStruct(comma->gtGetOp1(), curLevel, pAfterStmt, di, block);
                 return impStoreStruct(store, curLevel, pAfterStmt, di, block);
             }
 
-            NewCallArg   newArg     = NewCallArg::Primitive(destAddr).WellKnown(wellKnownArgType);
+            NewCallArg newArg = NewCallArg::Primitive(destAddr).WellKnown(wellKnownArgType);
 
             if (destAddr->OperIs(GT_LCL_ADDR))
             {
@@ -965,19 +967,21 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
 
-            if (!impIsAddressInLocal(destAddr) && (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+            if (!impIsAddressInLocal(destAddr) &&
+                (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, call->gtRetClsHnd, false);
                 destAddr = gtNewLclVarAddrNode(tmp, TYP_BYREF);
                 // Insert address of temp into existing call
                 call->gtArgs.InsertAfterThisOrFirst(this,
-                    NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer));
+                                                    NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer));
                 // Now the store needs to copy from the new temp instead.
                 store->Data() = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet());
-                call->gtType = TYP_VOID;
-                src->gtType = TYP_VOID;
-                return gtNewOperNode(GT_COMMA, TYP_VOID, src, impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block));
+                call->gtType  = TYP_VOID;
+                src->gtType   = TYP_VOID;
+                return gtNewOperNode(GT_COMMA, TYP_VOID, src,
+                                     impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block));
             }
 
             call->gtArgs.InsertAfterThisOrFirst(this,
@@ -1037,9 +1041,10 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 // impStoreStructPtr: Store (copy) the structure from 'src' to 'destAddr'.
 //
 // Arguments:
-//    destAddr - address of the destination of the store
-//    value    - value to store
-//    curLevel - stack level for which a spill may be being done
+//    destAddr   - address of the destination of the store
+//    value      - value to store
+//    curLevel   - stack level for which a spill may be being done
+//    indirFlags - flags to be used on the store node
 //
 // Return Value:
 //    The tree that should be appended to the statement list that represents the store.
@@ -1047,11 +1052,11 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 // Notes:
 //    Temp stores may be appended to impStmtList if spilling is necessary.
 //
-GenTree* Compiler::impStoreStructPtr(GenTree* destAddr, GenTree* value, unsigned curLevel)
+GenTree* Compiler::impStoreStructPtr(GenTree* destAddr, GenTree* value, unsigned curLevel, GenTreeFlags indirFlags)
 {
     var_types    type   = value->TypeGet();
     ClassLayout* layout = (type == TYP_STRUCT) ? value->GetLayout(this) : nullptr;
-    GenTree*     store  = gtNewStoreValueNode(type, layout, destAddr, value);
+    GenTree*     store  = gtNewStoreValueNode(type, layout, destAddr, value, indirFlags);
     store               = impStoreStruct(store, curLevel);
 
     return store;
@@ -11214,18 +11219,19 @@ bool Compiler::impReturnInstruction(int prefixFlags, OPCODE& opcode)
 
     if (info.compRetBuffArg != BAD_VAR_NUM)
     {
+        var_types retBuffType = lvaGetDesc(info.compRetBuffArg)->TypeGet();
         // Assign value to return buff (first param)
         GenTree* retBuffAddr =
-            gtNewLclvNode(info.compRetBuffArg, TYP_BYREF DEBUGARG(impCurStmtDI.GetLocation().GetOffset()));
+            gtNewLclvNode(info.compRetBuffArg, retBuffType DEBUGARG(impCurStmtDI.GetLocation().GetOffset()));
 
-        op2 = impStoreStructPtr(retBuffAddr, op2, CHECK_SPILL_ALL);
+        op2 = impStoreStructPtr(retBuffAddr, op2, CHECK_SPILL_ALL, GTF_IND_TGT_NOT_HEAP);
         impAppendTree(op2, CHECK_SPILL_NONE, impCurStmtDI);
 
         // There are cases where the address of the implicit RetBuf should be returned explicitly.
         //
         if (compMethodReturnsRetBufAddr())
         {
-            op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
+            op1 = gtNewOperNode(GT_RETURN, retBuffType, gtNewLclvNode(info.compRetBuffArg, retBuffType));
         }
         else
         {
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp
index ad9b203b454e3b..5d93897fb79693 100644
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -485,7 +485,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo, bool useFixedRetBuf
         info.compRetBuffArg = varDscInfo->varNum;
 
         LclVarDsc* varDsc  = varDscInfo->varDsc;
-        varDsc->lvType     = TYP_BYREF;
+        varDsc->lvType     = TYP_I_IMPL;
         varDsc->lvIsParam  = 1;
         varDsc->lvIsRegArg = 0;
 

From d1d4248fed5b4350eb8bc474b7cdf508296568de Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 01:58:42 +0100
Subject: [PATCH 04/17] formatting

---
 src/coreclr/jit/assertionprop.cpp       | 1 -
 src/coreclr/vm/reflectioninvocation.cpp | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp
index 76fa1656fa1945..ef91f916fb9563 100644
--- a/src/coreclr/jit/assertionprop.cpp
+++ b/src/coreclr/jit/assertionprop.cpp
@@ -6716,7 +6716,6 @@ Statement* Compiler::optVNAssertionPropCurStmt(BasicBlock* block, Statement* stm
     // anything in assertion gen.
     optAssertionPropagatedCurrentStmt = false;
 
-
     VNAssertionPropVisitorInfo data(this, block, stmt);
     fgWalkTreePost(stmt->GetRootNodePointer(), Compiler::optVNAssertionPropCurStmtVisitor, &data);
 
diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp
index 6f96903a6ba281..96013583a88621 100644
--- a/src/coreclr/vm/reflectioninvocation.cpp
+++ b/src/coreclr/vm/reflectioninvocation.cpp
@@ -669,6 +669,7 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod(
 
     if (pProtectValueClassFrame != NULL)
         pProtectValueClassFrame->Pop(pThread);
+
     }
 
 Done:

From af9f1f149894b23094ab291130004a1245a6a8b9 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 03:30:46 +0100
Subject: [PATCH 05/17] Enable the helper back

---
 src/coreclr/jit/flowgraph.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 8a414d33d0ddf4..3de5498bdb81dc 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -2459,12 +2459,12 @@ PhaseStatus Compiler::fgAddInternal()
 
 #if DEBUG
     // TODO: to be enabled only under jit-stress
-    // if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
-    // {
-    //     GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
-    //     fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
-    //     madeChanges = true;
-    // }
+    if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
+    {
+        GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
+        fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
+        madeChanges = true;
+    }
 #endif
 
     if (dbgHandle || pDbgHandle)

From b8855c721dbda830e62850c4b261c4802c9b114a Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 12:16:20 +0100
Subject: [PATCH 06/17] clean up

---
 src/coreclr/jit/compiler.h    |  1 +
 src/coreclr/jit/flowgraph.cpp |  5 +++--
 src/coreclr/jit/importer.cpp  | 28 ++++++++++++++++++++--------
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 5261bb3178e1ba..75f8567eb1143d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -10759,6 +10759,7 @@ class Compiler
         STRESS_MODE(POISON_IMPLICIT_BYREFS)                                                     \
         STRESS_MODE(STORE_BLOCK_UNROLLING)                                                      \
         STRESS_MODE(THREE_OPT_LAYOUT)                                                           \
+        STRESS_MODE(NONHEAP_RET_BUFFER)                                                         \
         STRESS_MODE(COUNT)
 
     enum                compStressArea
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 3de5498bdb81dc..d573542ca9c751 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -2458,8 +2458,9 @@ PhaseStatus Compiler::fgAddInternal()
     noway_assert(!dbgHandle || !pDbgHandle);
 
 #if DEBUG
-    // TODO: to be enabled only under jit-stress
-    if ((info.compRetBuffArg != BAD_VAR_NUM) && !opts.IsReadyToRun())
+    // JitStress: Insert a helper call to ensure that the return buffer is not on the GC heap.
+    if (compStressCompile(STRESS_NONHEAP_RET_BUFFER, 50) && (info.compRetBuffArg != BAD_VAR_NUM) &&
+        !opts.IsReadyToRun())
     {
         GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
         fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index f64301b2d07765..3db60da43000e6 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -844,15 +844,20 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
 
+            // Make sure we don't pass something other than a local address to the return buffer arg.
+            // It is allowed to pass current's method return buffer as it is a local too.
             if (!impIsAddressInLocal(destAddr) &&
-                (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+                (!destAddr->OperIsScalarLocal() ||
+                 (destAddr->AsLclVarCommon()->GetLclNum() != impInlineRoot()->info.compRetBuffArg)))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
-                GenTree* comma       = gtNewOperNode(GT_COMMA, store->TypeGet(), gtNewStoreLclVarNode(tmp, srcCall),
+
+                GenTree* spilledCall = gtNewStoreLclVarNode(tmp, srcCall);
+                GenTree* comma       = gtNewOperNode(GT_COMMA, store->TypeGet(), spilledCall,
                                                      gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()));
                 store->Data()        = comma;
-                comma->AsOp()->gtOp1 = impStoreStruct(comma->gtGetOp1(), curLevel, pAfterStmt, di, block);
+                comma->AsOp()->gtOp1 = impStoreStruct(spilledCall, curLevel, pAfterStmt, di, block);
                 return impStoreStruct(store, curLevel, pAfterStmt, di, block);
             }
 
@@ -967,21 +972,28 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             GenTreeFlags indirFlags = GTF_EMPTY;
             GenTree*     destAddr   = impGetNodeAddr(store, CHECK_SPILL_ALL, &indirFlags);
 
+            // Make sure we don't pass something other than a local address to the return buffer arg.
+            // It is allowed to pass current's method return buffer as it is a local too.
             if (!impIsAddressInLocal(destAddr) &&
-                (!destAddr->OperIsScalarLocal() || (destAddr->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)))
+                (!destAddr->OperIsScalarLocal() ||
+                 (destAddr->AsLclVarCommon()->GetLclNum() != impInlineRoot()->info.compRetBuffArg)))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, call->gtRetClsHnd, false);
                 destAddr = gtNewLclVarAddrNode(tmp, TYP_BYREF);
+
                 // Insert address of temp into existing call
-                call->gtArgs.InsertAfterThisOrFirst(this,
-                                                    NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer));
+                NewCallArg retBufArg = NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer);
+                call->gtArgs.InsertAfterThisOrFirst(this, retBufArg);
+
                 // Now the store needs to copy from the new temp instead.
                 store->Data() = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet());
                 call->gtType  = TYP_VOID;
                 src->gtType   = TYP_VOID;
-                return gtNewOperNode(GT_COMMA, TYP_VOID, src,
-                                     impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block));
+                store         = impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block);
+
+                // Wrap with the actual call
+                return gtNewOperNode(GT_COMMA, TYP_VOID, src, store);
             }
 
             call->gtArgs.InsertAfterThisOrFirst(this,

From 0806c6666bfad1797758b461170d6a74a4bab4c1 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 12:36:07 +0100
Subject: [PATCH 07/17] Address feedback

---
 src/coreclr/jit/importer.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 3db60da43000e6..7530c9661dc599 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -987,13 +987,11 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
                 call->gtArgs.InsertAfterThisOrFirst(this, retBufArg);
 
                 // Now the store needs to copy from the new temp instead.
-                store->Data() = gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet());
-                call->gtType  = TYP_VOID;
-                src->gtType   = TYP_VOID;
-                store         = impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block);
-
-                // Wrap with the actual call
-                return gtNewOperNode(GT_COMMA, TYP_VOID, src, store);
+                call->gtType      = TYP_VOID;
+                src->gtType       = TYP_VOID;
+                var_types tmpType = lvaGetDesc(tmp)->TypeGet();
+                store->Data()     = gtNewOperNode(GT_COMMA, tmpType, src, gtNewLclvNode(tmp, tmpType));
+                return impStoreStruct(store, CHECK_SPILL_ALL, pAfterStmt, di, block);
             }
 
             call->gtArgs.InsertAfterThisOrFirst(this,

From e926f20f11358c208ae7a2772fc8239ce49941b8 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 14:09:36 +0100
Subject: [PATCH 08/17] Bump major r2r version

---
 src/coreclr/inc/readytorun.h                               | 1 +
 src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h          | 2 +-
 src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index 1ca6b12b594132..de4f54c342814d 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -39,6 +39,7 @@
 // R2R Version 10.0 adds support for the statics being allocated on a per type basis instead of on a per module basis, disable support for LogMethodEnter helper
 // R2R Version 10.1 adds Unbox_TypeTest helper
 // R2R Version 11 uses GCInfo v4, which encodes safe points without -1 offset and does not track return kinds in GCInfo
+// R2R Version 12 requires all return buffers to be always on the stack
 
 struct READYTORUN_CORE_HEADER
 {
diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
index 358807e6ab563e..305901d7626a59 100644
--- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
+++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
@@ -11,7 +11,7 @@ struct ReadyToRunHeaderConstants
 {
     static const uint32_t Signature = 0x00525452; // 'RTR'
 
-    static const uint32_t CurrentMajorVersion = 11;
+    static const uint32_t CurrentMajorVersion = 12;
     static const uint32_t CurrentMinorVersion = 0;
 };
 
diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
index bca68c323f6523..10ed724001d54d 100644
--- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
+++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
@@ -15,7 +15,7 @@ internal struct ReadyToRunHeaderConstants
     {
         public const uint Signature = 0x00525452; // 'RTR'
 
-        public const ushort CurrentMajorVersion = 11;
+        public const ushort CurrentMajorVersion = 12;
         public const ushort CurrentMinorVersion = 0;
     }
 #if READYTORUN

From ba8c9f823263ffc336fe5733d228f67a67ac5e6f Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 4 Feb 2025 14:35:45 +0100
Subject: [PATCH 09/17] oops

---
 src/coreclr/inc/readytorun.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h
index de4f54c342814d..b097179f9bf67c 100644
--- a/src/coreclr/inc/readytorun.h
+++ b/src/coreclr/inc/readytorun.h
@@ -19,10 +19,10 @@
 //  src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
 // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION`
 // and handle pending work.
-#define READYTORUN_MAJOR_VERSION 11
+#define READYTORUN_MAJOR_VERSION 12
 #define READYTORUN_MINOR_VERSION 0x0000
 
-#define MINIMUM_READYTORUN_MAJOR_VERSION 11
+#define MINIMUM_READYTORUN_MAJOR_VERSION 12
 
 // R2R Version 2.1 adds the InliningInfo section
 // R2R Version 2.2 adds the ProfileDataInfo section

From e02d9ab216522894d99f29b56de4891e79ffec8b Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Wed, 5 Feb 2025 02:27:23 +0100
Subject: [PATCH 10/17] Change the doc

---
 docs/design/coreclr/botr/clr-abi.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md
index 7da3805324eb62..0eed53cd415e58 100644
--- a/docs/design/coreclr/botr/clr-abi.md
+++ b/docs/design/coreclr/botr/clr-abi.md
@@ -108,9 +108,7 @@ Passing/returning structs according to hardware floating-point calling conventio
 
 ## Return buffers
 
-The same applies to some return buffers. See `MethodTable::IsStructRequiringStackAllocRetBuf()`. When that returns `false`, the return buffer might be on the heap, either due to reflection/remoting code paths mentioned previously or due to a JIT optimization where a call with a return buffer that then assigns to a field (on the GC heap) are changed into passing the field reference as the return buffer. Conversely, when it returns true, the JIT does not need to use a write barrier when storing to the return buffer, but it is still not guaranteed to be a compiler temp, and as such the JIT should not introduce spurious writes to the return buffer.
-
-NOTE: This optimization is now disabled for all platforms (`IsStructRequiringStackAllocRetBuf()` always returns `false`).
+Since .NET 10, return buffers must always be allocated on the stack by the caller. After the call, the caller is responsible for copying the return buffer to the final destination using write barriers if necessary. The JIT can assume that the return buffer is always on the stack and may optimize accordingly, such as by omitting write barriers when writing GC pointers to the return buffer. In addition, the buffer is allowed to be used for temporary storage within the method since its content must not be aliased or cross-thread visible.
 
 ARM64-only: When a method returns a structure that is larger than 16 bytes the caller reserves a return buffer of sufficient size and alignment to hold the result. The address of the buffer is passed as an argument to the method in `R8` (defined in the JIT as `REG_ARG_RET_BUFF`). The callee isn't required to preserve the value stored in `R8`.
 
@@ -778,9 +776,7 @@ The return value is handled as follows:
 1. Floating-point values are returned on the top of the hardware FP stack.
 2. Integers up to 32 bits long are returned in EAX.
 3. 64-bit integers are passed with EAX holding the least significant 32 bits and EDX holding the most significant 32 bits.
-4. All other cases require the use of a return buffer, through which the value is returned.
-
-In addition, there is a guarantee that if a return buffer is used a value is stored there only upon ordinary exit from the method. The buffer is not allowed to be used for temporary storage within the method and its contents will be unaltered if an exception occurs while executing the method.
+4. All other cases require the use of a return buffer, through which the value is returned. See [Return buffers](#return-buffers).
 
 # Control Flow Guard (CFG) support on Windows
 

From 65b58eced32f52a257695b8dd3ef1e27b6694e32 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Fri, 7 Feb 2025 08:20:34 +0100
Subject: [PATCH 11/17] Address feedback

---
 src/coreclr/jit/compiler.h                         |  4 ++--
 src/coreclr/jit/importer.cpp                       | 13 +++++++------
 .../ReadyToRun/GCRefMapBuilder.cs                  |  7 -------
 src/coreclr/vm/frames.cpp                          | 14 --------------
 4 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 2782dc99441e9a..02564d83c8808f 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -5228,8 +5228,8 @@ class Compiler
     static LONG jitNestingLevel;
 #endif // DEBUG
 
-    static bool impIsInvariant(const GenTree* tree);
-    static bool impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut = nullptr);
+    bool impIsInvariant(const GenTree* tree);
+    bool impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut = nullptr);
 
     void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult);
 
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index d4dc3fca6dcdc8..2923c9ac5563df 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -846,9 +846,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 
             // Make sure we don't pass something other than a local address to the return buffer arg.
             // It is allowed to pass current's method return buffer as it is a local too.
-            if (!impIsAddressInLocal(destAddr) &&
-                (!destAddr->OperIsScalarLocal() ||
-                 (destAddr->AsLclVarCommon()->GetLclNum() != impInlineRoot()->info.compRetBuffArg)))
+            if (!impIsAddressInLocal(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
@@ -974,9 +972,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 
             // Make sure we don't pass something other than a local address to the return buffer arg.
             // It is allowed to pass current's method return buffer as it is a local too.
-            if (!impIsAddressInLocal(destAddr) &&
-                (!destAddr->OperIsScalarLocal() ||
-                 (destAddr->AsLclVarCommon()->GetLclNum() != impInlineRoot()->info.compRetBuffArg)))
+            if (!impIsAddressInLocal(destAddr) && !eeIsByrefLike(call->gtRetClsHnd))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, call->gtRetClsHnd, false);
@@ -12688,6 +12684,11 @@ bool Compiler::impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut)
         return true;
     }
 
+    if (op->OperIsScalarLocal() && (op->AsLclVarCommon()->GetLclNum() == impInlineRoot()->info.compRetBuffArg))
+    {
+        return true;
+    }
+
     return false;
 }
 
diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/GCRefMapBuilder.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/GCRefMapBuilder.cs
index ba8468c37d737c..dc77206257acf9 100644
--- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/GCRefMapBuilder.cs
+++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/GCRefMapBuilder.cs
@@ -182,13 +182,6 @@ private void FakeGcScanRoots(MethodDesc method, ArgIterator argit, CORCOMPILE_GC
                 return;
             }
 
-            // Also if the method has a return buffer, then it is the first argument, and could be an interior ref,
-            // so always promote it.
-            if (argit.HasRetBuffArg())
-            {
-                frame[_transitionBlock.GetRetBuffArgOffset(argit.HasThis)] = CORCOMPILE_GCREFMAP_TOKENS.GCREFMAP_INTERIOR;
-            }
-
             //
             // Now iterate the arguments
             //
diff --git a/src/coreclr/vm/frames.cpp b/src/coreclr/vm/frames.cpp
index 4479e678241908..7ef8801a79f628 100644
--- a/src/coreclr/vm/frames.cpp
+++ b/src/coreclr/vm/frames.cpp
@@ -1376,13 +1376,6 @@ void TransitionFrame::PromoteCallerStackHelper(promote_func* fn, ScanContext* sc
             (fn)(PTR_PTR_Object(pThis), sc, CHECK_APP_DOMAIN);
     }
 
-    if (argit.HasRetBuffArg())
-    {
-        PTR_PTR_VOID pRetBuffArg = dac_cast<PTR_PTR_VOID>(pTransitionBlock + argit.GetRetBuffArgOffset());
-        LOG((LF_GC, INFO3, "    ret buf Argument promoted from" FMT_ADDR "\n", DBG_ADDR(*pRetBuffArg) ));
-        PromoteCarefully(fn, PTR_PTR_Object(pRetBuffArg), sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN);
-    }
-
     int argOffset;
     while ((argOffset = argit.GetNextOffset()) != TransitionBlock::InvalidOffset)
     {
@@ -1982,13 +1975,6 @@ void FakeGcScanRoots(MetaSig& msig, ArgIterator& argit, MethodDesc * pMD, BYTE *
         return;
     }
 
-    // Also if the method has a return buffer, then it is the first argument, and could be an interior ref,
-    // so always promote it.
-    if (argit.HasRetBuffArg())
-    {
-        FakePromote((Object **)(pFrame + argit.GetRetBuffArgOffset()), &sc, GC_CALL_INTERIOR);
-    }
-
     //
     // Now iterate the arguments
     //

From 8079e12561bbf4476959689d97851560d7beeda8 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Fri, 7 Feb 2025 23:35:52 +0100
Subject: [PATCH 12/17] Address feedback

---
 docs/design/features/tailcalls-with-helpers.md | 6 +++---
 src/coreclr/jit/importer.cpp                   | 2 +-
 src/coreclr/jit/rationalize.cpp                | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/design/features/tailcalls-with-helpers.md b/docs/design/features/tailcalls-with-helpers.md
index 69a9aafd11748d..c99af8269d21a0 100644
--- a/docs/design/features/tailcalls-with-helpers.md
+++ b/docs/design/features/tailcalls-with-helpers.md
@@ -259,9 +259,9 @@ stack, even on ARM architectures, due to its return address hijacking mechanism.
 
 When the result is returned by value the JIT will introduce a local and pass a
 pointer to it in the second argument. For ret bufs the JIT will typically
-directly pass along its own return buffer parameter to DispatchTailCalls. It is
-possible that this return buffer is pointing into GC heap, so the result is
-always tracked as a byref in the mechanism.
+directly pass along its own return buffer parameter to DispatchTailCalls. The
+return buffer is always located on the stack, so the result does not need to be
+tracked as a byref.
 
 In certain cases the target function pointer is also stored. For some targets
 this might require the JIT to perform the equivalent of `ldvirtftn` or `ldftn`
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 2923c9ac5563df..d92b4d028f6d6e 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -976,7 +976,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, call->gtRetClsHnd, false);
-                destAddr = gtNewLclVarAddrNode(tmp, TYP_BYREF);
+                destAddr = gtNewLclVarAddrNode(tmp, TYP_I_IMPL);
 
                 // Insert address of temp into existing call
                 NewCallArg retBufArg = NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer);
diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp
index 572240b6298e55..de23305c145b7f 100644
--- a/src/coreclr/jit/rationalize.cpp
+++ b/src/coreclr/jit/rationalize.cpp
@@ -157,7 +157,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree**             use,
         tmpNum = comp->lvaGrabTemp(true DEBUGARG("return buffer for hwintrinsic"));
         comp->lvaSetStruct(tmpNum, sig->retTypeClass, false);
 
-        GenTree*   destAddr = comp->gtNewLclVarAddrNode(tmpNum, TYP_BYREF);
+        GenTree*   destAddr = comp->gtNewLclVarAddrNode(tmpNum, TYP_I_IMPL);
         NewCallArg newArg   = NewCallArg::Primitive(destAddr).WellKnown(WellKnownArg::RetBuffer);
 
         call->gtArgs.InsertAfterThisOrFirst(comp, newArg);

From 076481882fa40a73b05f6565e36175e98f7af796 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Sat, 8 Feb 2025 01:01:50 +0100
Subject: [PATCH 13/17] remove helper

---
 src/coreclr/inc/corinfo.h                             |  2 --
 src/coreclr/inc/jithelpers.h                          |  1 -
 src/coreclr/jit/compiler.h                            |  1 -
 src/coreclr/jit/flowgraph.cpp                         | 11 -----------
 src/coreclr/jit/utils.cpp                             |  1 -
 .../tools/Common/JitInterface/CorInfoHelpFunc.cs      |  2 --
 src/coreclr/vm/gchelpers.cpp                          | 10 ----------
 src/coreclr/vm/jitinterface.h                         |  1 -
 8 files changed, 29 deletions(-)

diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h
index 5b2eb0798cc4ff..2128b8cabe7c32 100644
--- a/src/coreclr/inc/corinfo.h
+++ b/src/coreclr/inc/corinfo.h
@@ -594,8 +594,6 @@ enum CorInfoHelpFunc
     CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
     CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
 
-    CORINFO_HELP_ENSURE_NONHEAP,            // Ensure that the target was not in the heap.
-
     CORINFO_HELP_COUNT,
 };
 
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index 9976016820dd90..38114a9bbfcada 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -341,7 +341,6 @@
     JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, NULL, METHOD__NIL)
     JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, NULL, METHOD__NIL)
 #endif
-    JITHELPER(CORINFO_HELP_ENSURE_NONHEAP, JIT_EnsureNonHeapTarget,METHOD__NIL)
 
 #undef JITHELPER
 #undef DYNAMICJITHELPER
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 0757280bd3aa89..b2abdfdc0498f7 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -10795,7 +10795,6 @@ class Compiler
         STRESS_MODE(POISON_IMPLICIT_BYREFS)                                                     \
         STRESS_MODE(STORE_BLOCK_UNROLLING)                                                      \
         STRESS_MODE(THREE_OPT_LAYOUT)                                                           \
-        STRESS_MODE(NONHEAP_RET_BUFFER)                                                         \
         STRESS_MODE(COUNT)
 
     enum                compStressArea
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 892f0919df26e5..369b7b7a30241c 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -2457,17 +2457,6 @@ PhaseStatus Compiler::fgAddInternal()
 
     noway_assert(!dbgHandle || !pDbgHandle);
 
-#if DEBUG
-    // JitStress: Insert a helper call to ensure that the return buffer is not on the GC heap.
-    if (compStressCompile(STRESS_NONHEAP_RET_BUFFER, 50) && (info.compRetBuffArg != BAD_VAR_NUM) &&
-        !opts.IsReadyToRun())
-    {
-        GenTree* retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF);
-        fgNewStmtAtBeg(fgFirstBB, gtNewHelperCallNode(CORINFO_HELP_ENSURE_NONHEAP, TYP_VOID, retBuffAddr));
-        madeChanges = true;
-    }
-#endif
-
     if (dbgHandle || pDbgHandle)
     {
         // Test the JustMyCode VM global state variable
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index bb0d62af848f77..b308408a47e5d5 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -1770,7 +1770,6 @@ void HelperCallProperties::init()
                 isNoGC = true;
                 FALLTHROUGH;
             case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP:
-            case CORINFO_HELP_ENSURE_NONHEAP:
             case CORINFO_HELP_BULK_WRITEBARRIER:
                 mutatesHeap = true;
                 break;
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
index 88cd51c01e16b9..04bca41e476671 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
@@ -284,8 +284,6 @@ which is the right helper to use to allocate an object of a given type. */
         CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
         CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
 
-        CORINFO_HELP_ENSURE_NONHEAP,            // Ensure that the target was not in the heap.
-
         CORINFO_HELP_COUNT,
     }
 }
diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp
index 048e840728feda..f002fbe1b59a27 100644
--- a/src/coreclr/vm/gchelpers.cpp
+++ b/src/coreclr/vm/gchelpers.cpp
@@ -1466,16 +1466,6 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrierEnsureNonHeapTarget, Object **dst,
 }
 HCIMPLEND_RAW
 
-extern "C" HCIMPL1_RAW(VOID, JIT_EnsureNonHeapTarget, void *dst)
-{
-    STATIC_CONTRACT_MODE_COOPERATIVE;
-    STATIC_CONTRACT_THROWS;
-    STATIC_CONTRACT_GC_NOTRIGGER;
-
-    _ASSERT(!GCHeapUtilities::GetGCHeap()->IsHeapPointer(dst));
-}
-HCIMPLEND_RAW
-
 // This function sets the card table with the granularity of 1 byte, to avoid ghost updates
 //    that could occur if multiple threads were trying to set different bits in the same card.
 
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index 1f8986619ab303..7f1835e458a53a 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -235,7 +235,6 @@ extern "C" FCDECL2(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref);
 
 extern "C" FCDECL2(VOID, JIT_WriteBarrier, Object **dst, Object *ref);
 extern "C" FCDECL2(VOID, JIT_WriteBarrierEnsureNonHeapTarget, Object **dst, Object *ref);
-extern "C" FCDECL1(VOID, JIT_EnsureNonHeapTarget, void *dst);
 
 // ARM64 JIT_WriteBarrier uses special ABI and thus is not callable directly
 // Copied write barriers must be called at a different location

From db01527a24177cf4a3a5a6550081c51a317b6ce2 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Mon, 10 Feb 2025 21:11:15 +0100
Subject: [PATCH 14/17] Address feedback

---
 src/coreclr/jit/compiler.h    |  1 +
 src/coreclr/jit/flowgraph.cpp | 43 +++++++++++++++++++++++++++++++++++
 src/coreclr/jit/gentree.cpp   |  2 +-
 src/coreclr/jit/importer.cpp  |  9 ++------
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index b2abdfdc0498f7..ded13ccccf9356 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -6793,6 +6793,7 @@ class Compiler
 
 public:
     bool fgAddrCouldBeNull(GenTree* addr);
+    bool fgAddrCouldBeHeap(GenTree* addr);
     void fgAssignSetVarDef(GenTree* tree);
 
 private:
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 369b7b7a30241c..7a5c621e572a52 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -928,6 +928,49 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr)
     return true; // default result: addr could be null.
 }
 
+//------------------------------------------------------------------------------
+// fgAddrCouldBeHeap: Check whether the address tree may represent a heap address.
+//
+// Arguments:
+//    addr - Address to check
+//
+// Return Value:
+//    True if address could be a heap address; false otherwise (i.e. stack, native memory, etc.)
+//
+bool Compiler::fgAddrCouldBeHeap(GenTree* addr)
+{
+    const GenTree* op = addr;
+    while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance())
+    {
+        op = op->AsFieldAddr()->GetFldObj();
+    }
+
+    if (op->OperIs(GT_LCL_ADDR))
+    {
+        return false;
+    }
+
+    if (op->OperIsScalarLocal() && (op->AsLclVarCommon()->GetLclNum() == impInlineRoot()->info.compRetBuffArg))
+    {
+        // RetBuf is known to be on the stack
+        return false;
+    }
+
+    if (op->OperIs(GT_ADD))
+    {
+        // If we have (base + offset), inspect the base. We assume someone else normalized the tree
+        // so the constant offset is always on the right.
+        GenTree* op2 = op->gtGetOp2();
+        if (op2->TypeIs(TYP_I_IMPL) && op2->IsCnsIntOrI() && !op2->IsIconHandle() &&
+            !fgIsBigOffset(op2->AsIntCon()->IconValue()))
+        {
+            return fgAddrCouldBeHeap(op->gtGetOp1());
+        }
+    }
+
+    return true;
+}
+
 //------------------------------------------------------------------------------
 // fgOptimizeDelegateConstructor: try and optimize construction of a delegate
 //
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index ea02025450b39a..f7e3cdf86aa0b2 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -18301,7 +18301,7 @@ bool GenTreeIndir::IsAddressNotOnHeap(Compiler* comp)
         return true;
     }
 
-    if (HasBase() && Base()->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR))
+    if (HasBase() && !comp->fgAddrCouldBeHeap(Base()->gtSkipReloadOrCopy()))
     {
         return true;
     }
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index d92b4d028f6d6e..29aa3a61ca4ade 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -846,7 +846,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 
             // Make sure we don't pass something other than a local address to the return buffer arg.
             // It is allowed to pass current's method return buffer as it is a local too.
-            if (!impIsAddressInLocal(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd))
+            if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(srcCall->gtRetClsHnd))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
@@ -972,7 +972,7 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
 
             // Make sure we don't pass something other than a local address to the return buffer arg.
             // It is allowed to pass current's method return buffer as it is a local too.
-            if (!impIsAddressInLocal(destAddr) && !eeIsByrefLike(call->gtRetClsHnd))
+            if (fgAddrCouldBeHeap(destAddr) && !eeIsByrefLike(call->gtRetClsHnd))
             {
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, call->gtRetClsHnd, false);
@@ -12684,11 +12684,6 @@ bool Compiler::impIsAddressInLocal(const GenTree* tree, GenTree** lclVarTreeOut)
         return true;
     }
 
-    if (op->OperIsScalarLocal() && (op->AsLclVarCommon()->GetLclNum() == impInlineRoot()->info.compRetBuffArg))
-    {
-        return true;
-    }
-
     return false;
 }
 

From 280dd4386a472b890165cc6d73041c689a4cec04 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Mon, 10 Feb 2025 22:02:11 +0100
Subject: [PATCH 15/17] FB

---
 src/coreclr/jit/flowgraph.cpp | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 7a5c621e572a52..70b0e7c5667486 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -939,12 +939,17 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr)
 //
 bool Compiler::fgAddrCouldBeHeap(GenTree* addr)
 {
-    const GenTree* op = addr;
+    GenTree* op = addr;
     while (op->OperIs(GT_FIELD_ADDR) && op->AsFieldAddr()->IsInstance())
     {
         op = op->AsFieldAddr()->GetFldObj();
     }
 
+    target_ssize_t offset;
+    gtPeelOffsets(&op, &offset);
+
+    // Ignore the offset for locals
+
     if (op->OperIs(GT_LCL_ADDR))
     {
         return false;
@@ -956,18 +961,6 @@ bool Compiler::fgAddrCouldBeHeap(GenTree* addr)
         return false;
     }
 
-    if (op->OperIs(GT_ADD))
-    {
-        // If we have (base + offset), inspect the base. We assume someone else normalized the tree
-        // so the constant offset is always on the right.
-        GenTree* op2 = op->gtGetOp2();
-        if (op2->TypeIs(TYP_I_IMPL) && op2->IsCnsIntOrI() && !op2->IsIconHandle() &&
-            !fgIsBigOffset(op2->AsIntCon()->IconValue()))
-        {
-            return fgAddrCouldBeHeap(op->gtGetOp1());
-        }
-    }
-
     return true;
 }
 

From 4a48e641159636b339eb9a566bf11e43ae3e3f68 Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 11 Feb 2025 08:28:01 +0100
Subject: [PATCH 16/17] address feedback

---
 src/coreclr/jit/importer.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 29aa3a61ca4ade..fe09e55525f440 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -851,11 +851,20 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
 
+                // Make sure the address is evaluated first if needed.
+                if (store->OperIs(GT_STOREIND, GT_STORE_BLK) &&
+                    ((store->AsIndir()->Addr()->gtFlags & GTF_ALL_EFFECT) != 0))
+                {
+                    unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgMakeTemp is creating a new local variable"));
+                    impStoreToTemp(lclNum, store->AsIndir()->Addr(), curLevel, pAfterStmt, di, block);
+                    store->AsIndir()->Addr() = gtNewLclvNode(lclNum, genActualType(store->AsIndir()->Addr()));
+                }
+
                 GenTree* spilledCall = gtNewStoreLclVarNode(tmp, srcCall);
-                GenTree* comma       = gtNewOperNode(GT_COMMA, store->TypeGet(), spilledCall,
+                spilledCall          = impStoreStruct(spilledCall, curLevel, pAfterStmt, di, block);
+                store->Data()        = gtNewOperNode(GT_COMMA, store->TypeGet(), spilledCall,
                                                      gtNewLclvNode(tmp, lvaGetDesc(tmp)->TypeGet()));
-                store->Data()        = comma;
-                comma->AsOp()->gtOp1 = impStoreStruct(spilledCall, curLevel, pAfterStmt, di, block);
+
                 return impStoreStruct(store, curLevel, pAfterStmt, di, block);
             }
 

From da55ebf01ba819f8e66f718c2d18f1d52eb52f6e Mon Sep 17 00:00:00 2001
From: EgorBo <egorbo@gmail.com>
Date: Tue, 11 Feb 2025 09:58:08 +0100
Subject: [PATCH 17/17] revert to previous

---
 src/coreclr/jit/importer.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index fe09e55525f440..65c1323b0db870 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -851,15 +851,6 @@ GenTree* Compiler::impStoreStruct(GenTree*         store,
                 unsigned tmp = lvaGrabTemp(false DEBUGARG("stack copy for value returned via return buffer"));
                 lvaSetStruct(tmp, srcCall->gtRetClsHnd, false);
 
-                // Make sure the address is evaluated first if needed.
-                if (store->OperIs(GT_STOREIND, GT_STORE_BLK) &&
-                    ((store->AsIndir()->Addr()->gtFlags & GTF_ALL_EFFECT) != 0))
-                {
-                    unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgMakeTemp is creating a new local variable"));
-                    impStoreToTemp(lclNum, store->AsIndir()->Addr(), curLevel, pAfterStmt, di, block);
-                    store->AsIndir()->Addr() = gtNewLclvNode(lclNum, genActualType(store->AsIndir()->Addr()));
-                }
-
                 GenTree* spilledCall = gtNewStoreLclVarNode(tmp, srcCall);
                 spilledCall          = impStoreStruct(spilledCall, curLevel, pAfterStmt, di, block);
                 store->Data()        = gtNewOperNode(GT_COMMA, store->TypeGet(), spilledCall,