dotnet · jakobbotsch · Jul 21, 2024 · Jul 20, 2024 · Jul 20, 2024 · Jul 20, 2024
diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp
@@ -5844,6 +5844,12 @@ void emitter::emitIns_R_R_I(instruction     ins,
                 return;
             }
 
+            if ((reg1 == reg2) && (EA_SIZE(attr) == EA_PTRSIZE) && emitComp->opts.OptimizationEnabled() &&
+                OptimizePostIndexed(ins, reg1, imm, attr))
+            {
+                return;
+            }
+
             reg1 = encodingSPtoZR(reg1);
             reg2 = encodingSPtoZR(reg2);
         }
@@ -11070,6 +11076,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             code |= ((code_t)imm << 12);                 // iiiiiiiii
             code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
             dst += emitOutput_Instr(dst, code);
+
+            // With pre or post-indexing we may have a second GC register to
+            // update.
+            if (insOptsIndexed(id->idInsOpt()) && !id->idIsSmallDsc())
+            {
+                if (emitInsIsLoad(ins))
+                {
+                    // Load will write the destination (reg1).
+                    if (id->idGCref() != GCT_NONE)
+                    {
+                        emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                    }
+                    else
+                    {
+                        emitGCregDeadUpd(id->idReg1(), dst);
+                    }
+                }
+
+                // We will always write reg2.
+                if (id->idGCrefReg2() != GCT_NONE)
+                {
+                    emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst);
+                }
+                else
+                {
+                    emitGCregDeadUpd(id->idReg2(), dst);
+                }
+
+                goto SKIP_GC_UPDATE;
+            }
+
             break;
 
         case IF_LS_2D: // LS_2D   .Q.............. ....ssnnnnnttttt      Vt Rn
@@ -17150,6 +17187,127 @@ bool emitter::IsOptimizableLdrToMov(
     return true;
 }
 
+//-----------------------------------------------------------------------------------
+// OptimizePostIndexed: Optimize an addition/subtraction from a register by
+// replacing the previous instruction with a post-indexed addressing form if
+// possible.
+//
+// Arguments:
+//   ins  - Whether this is an add or subtraction
+//   reg  - The register that is being updated
+//   imm  - Immediate that is being added/subtracted
+//
+// Returns:
+//   True if the previous instruction was optimized to perform the add/sub.
+//
+bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr)
+{
+    assert((ins == INS_add) || (ins == INS_sub));
+
+    if (!emitCanPeepholeLastIns() || !emitInsIsLoadOrStore(emitLastIns->idIns()))
+    {
+        return false;
+    }
+
+    if ((emitLastIns->idInsFmt() != IF_LS_2A) || emitLastIns->idIsTlsGD())
+    {
+        return false;
+    }
+
+    // Cannot allow post indexing if the load itself is already modifying the
+    // register.
+    regNumber loadStoreDataReg = emitLastIns->idReg1();
+    if (loadStoreDataReg == reg)
+    {
+        return false;
+    }
+
+    // We must be updating the same register that the addressing is happening
+    // on. The SP register is stored as ZR, so make sure to normalize that too.
+    regNumber loadStoreAddrReg = encodingZRtoSP(emitLastIns->idReg2());
+    if (loadStoreAddrReg != reg)
+    {
+        return false;
+    }
+
+    // Only some stores/loads are eligible
+    switch (emitLastIns->idIns())
+    {
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldurb:
+        case INS_sturb:
+        case INS_ldrh:
+        case INS_strh:
+        case INS_ldurh:
+        case INS_sturh:
+        case INS_ldrsb:
+        case INS_ldursb:
+        case INS_ldrsh:
+        case INS_ldursh:
+        case INS_ldrsw:
+        case INS_ldursw:
+        case INS_ldr:
+        case INS_str:
+        case INS_ldur:
+        case INS_stur:
+            break;
+
+        default:
+            return false;
+    }
+
+    if (ins == INS_sub)
+    {
+        imm = -imm;
+    }
+
+    // Only some post-indexing offsets can be represented.
+    if ((imm < -256) || (imm >= 256))
+    {
+        return false;
+    }
+
+    instruction newIns = emitLastIns->idIns();
+    emitAttr    newAttr;
+
+    switch (emitLastIns->idGCref())
+    {
+        case GCT_BYREF:
+            newAttr = EA_BYREF;
+            break;
+        case GCT_GCREF:
+            newAttr = EA_GCREF;
+            break;
+        default:
+            newAttr = emitLastIns->idOpSize();
+            break;
+    }
+
+    emitRemoveLastInstruction();
+
+    instrDesc* id = emitNewInstrCns(newAttr, imm);
+    id->idIns(newIns);
+    id->idInsFmt(IF_LS_2C);
+    id->idInsOpt(INS_OPTS_POST_INDEX);
+
+    id->idReg1(loadStoreDataReg);
+    id->idReg2(encodingSPtoZR(loadStoreAddrReg));
+
+    if (EA_IS_BYREF(regAttr))
+    {
+        id->idGCrefReg2(GCT_BYREF);
+    }
+    else if (EA_IS_GCREF(regAttr))
+    {
+        id->idGCrefReg2(GCT_GCREF);
+    }
+
+    dispIns(id);
+    appendToCurIG(id);
+    return true;
+}
+
 #if defined(FEATURE_SIMD)
 //-----------------------------------------------------------------------------------
 // emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.

diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h
@@ -193,6 +193,8 @@ FORCEINLINE bool OptimizeLdrStr(instruction ins,
                                 int         varx     = -1,
                                 int         offs     = -1 DEBUG_ARG(bool useRsvdReg = false));
 
+bool OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, emitAttr regAttr);
+
 emitLclVarAddr* emitGetLclVarPairLclVar2(instrDesc* id)
 {
     assert(id->idIsLclVarPair());