diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5755e304fae638..d4732bdf0476e3 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7907,6 +7907,9 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo // | \--* LCL_VAR byref V00 // \--* CNS_INT long 0x200000001 // +// NOTE: Our memory model allows us to do this optimization, see Memory-model.md: +// * Adjacent non-volatile writes to the same location can be coalesced. (see Memory-model.md) +// // Arguments: // ind - the current STOREIND node // @@ -7920,13 +7923,6 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) return; } - // For now, we require the current STOREIND to have LEA (previous store may not have it) - // So we can easily adjust the offset, consider making it more flexible in future. - if (!ind->Addr()->OperIs(GT_LEA)) - { - return; - } - // TODO-ARM64-CQ: enable TYP_REF if we find a case where it's beneficial. // The algorithm does support TYP_REF (with null value), but it seems to be not worth // it on ARM64 where it's pretty efficient to do "stp xzr, xzr, [addr]" to clear two @@ -8007,12 +8003,31 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind) return; } - // Offset has to match the size of the type. We don't support the same or overlapping offsets. + // At this point we know that we have two consecutive STOREINDs with the same base address, + // index and scale, the only variable thing is the offset (constant) + + // The same offset means that we're storing to the same location of the same width. + // Just remove the previous store then. + if (prevData.offset == currData.offset) + { + BlockRange().Remove(std::move(prevIndRange)); + continue; + } + + // Otherwise, the difference between two offsets has to match the size of the type. + // We don't support overlapping stores. if (abs(prevData.offset - currData.offset) != (int)genTypeSize(prevData.targetType)) { return; } + // For now, we require the current STOREIND to have LEA (previous store may not have it) + // So we can easily adjust the offset, consider making it more flexible in future. + if (!ind->Addr()->OperIs(GT_LEA)) + { + return; + } + // Now the hardest part: decide whether it's safe to use an unaligned write. // // IND is always fine (and all IND created here from such)