@@ -414,8 +414,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
414
414
}
415
415
416
416
case GT_STOREIND:
417
- LowerStoreIndirCommon(node->AsStoreInd());
418
- break;
417
+ return LowerStoreIndirCommon(node->AsStoreInd());
419
418
420
419
case GT_ADD:
421
420
{
@@ -8895,7 +8894,10 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
8895
8894
// Arguments:
8896
8895
// ind - the store indirection node we are lowering.
8897
8896
//
8898
- void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
8897
+ // Return Value:
8898
+ // Next node to lower.
8899
+ //
8900
+ GenTree* Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
8899
8901
{
8900
8902
assert(ind->TypeGet() != TYP_STRUCT);
8901
8903
@@ -8910,28 +8912,30 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
8910
8912
#endif
8911
8913
TryCreateAddrMode(ind->Addr(), isContainable, ind);
8912
8914
8913
- if (! comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind))
8915
+ if (comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind))
8914
8916
{
8917
+ return ind->gtNext;
8918
+ }
8919
+
8915
8920
#ifndef TARGET_XARCH
8916
- if (ind->Data()->IsIconHandle(GTF_ICON_OBJ_HDL))
8921
+ if (ind->Data()->IsIconHandle(GTF_ICON_OBJ_HDL))
8922
+ {
8923
+ const ssize_t handle = ind->Data()->AsIntCon()->IconValue();
8924
+ if (!comp->info.compCompHnd->isObjectImmutable(reinterpret_cast<CORINFO_OBJECT_HANDLE>(handle)))
8917
8925
{
8918
- const ssize_t handle = ind->Data()->AsIntCon()->IconValue();
8919
- if (!comp->info.compCompHnd->isObjectImmutable(reinterpret_cast<CORINFO_OBJECT_HANDLE>(handle)))
8920
- {
8921
- // On platforms with weaker memory model we need to make sure we use a store with the release semantic
8922
- // when we publish a potentially mutable object
8923
- // See relevant discussions https://github.com/dotnet/runtime/pull/76135#issuecomment-1257258310 and
8924
- // https://github.com/dotnet/runtime/pull/76112#discussion_r980639782
8926
+ // On platforms with weaker memory model we need to make sure we use a store with the release semantic
8927
+ // when we publish a potentially mutable object
8928
+ // See relevant discussions https://github.com/dotnet/runtime/pull/76135#issuecomment-1257258310 and
8929
+ // https://github.com/dotnet/runtime/pull/76112#discussion_r980639782
8925
8930
8926
- // This can be relaxed to "just make sure to use stlr/memory barrier" if needed
8927
- ind->gtFlags |= GTF_IND_VOLATILE;
8928
- }
8931
+ // This can be relaxed to "just make sure to use stlr/memory barrier" if needed
8932
+ ind->gtFlags |= GTF_IND_VOLATILE;
8929
8933
}
8934
+ }
8930
8935
#endif
8931
8936
8932
- LowerStoreIndirCoalescing(ind);
8933
- LowerStoreIndir(ind);
8934
- }
8937
+ LowerStoreIndirCoalescing(ind);
8938
+ return LowerStoreIndir(ind);
8935
8939
}
8936
8940
8937
8941
//------------------------------------------------------------------------
@@ -9014,7 +9018,7 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
9014
9018
#ifdef TARGET_ARM64
9015
9019
if (comp->opts.OptimizationEnabled() && ind->OperIs(GT_IND))
9016
9020
{
9017
- OptimizeForLdp (ind);
9021
+ OptimizeForLdpStp (ind);
9018
9022
}
9019
9023
#endif
9020
9024
@@ -9029,7 +9033,7 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
9029
9033
// cases passing the distance check, but 82 out of these 112 extra cases were
9030
9034
// then rejected due to interference. So 16 seems like a good number to balance
9031
9035
// the throughput costs.
9032
- const int LDP_REORDERING_MAX_DISTANCE = 16;
9036
+ const int LDP_STP_REORDERING_MAX_DISTANCE = 16;
9033
9037
9034
9038
//------------------------------------------------------------------------
9035
9039
// OptimizeForLdp: Record information about an indirection, and try to optimize
@@ -9042,7 +9046,7 @@ const int LDP_REORDERING_MAX_DISTANCE = 16;
9042
9046
// Returns:
9043
9047
// True if the optimization was successful.
9044
9048
//
9045
- bool Lowering::OptimizeForLdp (GenTreeIndir* ind)
9049
+ bool Lowering::OptimizeForLdpStp (GenTreeIndir* ind)
9046
9050
{
9047
9051
if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_FLOAT, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile())
9048
9052
{
@@ -9060,7 +9064,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
9060
9064
9061
9065
// Every indirection takes an expected 2+ nodes, so we only expect at most
9062
9066
// half the reordering distance to be candidates for the optimization.
9063
- int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2);
9067
+ int maxCount = min(m_blockIndirs.Height(), LDP_STP_REORDERING_MAX_DISTANCE / 2);
9064
9068
for (int i = 0; i < maxCount; i++)
9065
9069
{
9066
9070
SavedIndir& prev = m_blockIndirs.TopRef(i);
@@ -9075,11 +9079,22 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
9075
9079
continue;
9076
9080
}
9077
9081
9082
+ if (prevIndir->gtNext == nullptr)
9083
+ {
9084
+ // Deleted by other optimization
9085
+ continue;
9086
+ }
9087
+
9088
+ if (prevIndir->OperIsStore() != ind->OperIsStore())
9089
+ {
9090
+ continue;
9091
+ }
9092
+
9078
9093
JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n",
9079
9094
Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset);
9080
9095
if (std::abs(offs - prev.Offset) == genTypeSize(ind))
9081
9096
{
9082
- JITDUMP(" ..and they are amenable to ldp optimization\n");
9097
+ JITDUMP(" ..and they are amenable to ldp/stp optimization\n");
9083
9098
if (TryMakeIndirsAdjacent(prevIndir, ind))
9084
9099
{
9085
9100
// Do not let the previous one participate in
@@ -9115,7 +9130,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
9115
9130
bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir)
9116
9131
{
9117
9132
GenTree* cur = prevIndir;
9118
- for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE ; i++)
9133
+ for (int i = 0; i < LDP_STP_REORDERING_MAX_DISTANCE ; i++)
9119
9134
{
9120
9135
cur = cur->gtNext;
9121
9136
if (cur == indir)
@@ -9172,8 +9187,16 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
9172
9187
INDEBUG(dumpWithMarks());
9173
9188
JITDUMP("\n");
9174
9189
9190
+ if ((prevIndir->gtLIRFlags & LIR::Flags::Mark) != 0)
9191
+ {
9192
+ JITDUMP("Previous indir is part of the data flow of current indir\n");
9193
+ UnmarkTree(indir);
9194
+ return false;
9195
+ }
9196
+
9175
9197
m_scratchSideEffects.Clear();
9176
9198
9199
+ bool sawData = false;
9177
9200
for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext)
9178
9201
{
9179
9202
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
@@ -9186,6 +9209,11 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
9186
9209
UnmarkTree(indir);
9187
9210
return false;
9188
9211
}
9212
+
9213
+ if (indir->OperIsStore())
9214
+ {
9215
+ sawData |= cur == indir->Data();
9216
+ }
9189
9217
}
9190
9218
else
9191
9219
{
@@ -9197,6 +9225,13 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
9197
9225
9198
9226
if (m_scratchSideEffects.InterferesWith(comp, indir, true))
9199
9227
{
9228
+ if (!indir->OperIsLoad())
9229
+ {
9230
+ JITDUMP("Have conservative interference with last store. Giving up.\n");
9231
+ UnmarkTree(indir);
9232
+ return false;
9233
+ }
9234
+
9200
9235
// Try a bit harder, making use of the following facts:
9201
9236
//
9202
9237
// 1. We know the indir is non-faulting, so we do not need to worry
@@ -9293,8 +9328,39 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
9293
9328
}
9294
9329
}
9295
9330
9296
- JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
9297
- Compiler::dspTreeID(indir));
9331
+ JITDUMP("Interference checks passed: can move unrelated nodes past second indir.\n");
9332
+
9333
+ if (sawData)
9334
+ {
9335
+ // If the data node of 'indir' is between 'prevIndir' and 'indir' then
9336
+ // try to move the previous indir up to happen after the data
9337
+ // computation. We will be moving all nodes unrelated to the data flow
9338
+ // past 'indir', so we only need to check interference between
9339
+ // 'prevIndir' and all nodes that are part of 'indir's dataflow.
9340
+ m_scratchSideEffects.Clear();
9341
+ m_scratchSideEffects.AddNode(comp, prevIndir);
9342
+
9343
+ for (GenTree* cur = prevIndir->gtNext;; cur = cur->gtNext)
9344
+ {
9345
+ if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
9346
+ {
9347
+ if (m_scratchSideEffects.InterferesWith(comp, cur, true))
9348
+ {
9349
+ JITDUMP("Cannot move prev indir [%06u] up past [%06u] to get it past the data computation\n",
9350
+ Compiler::dspTreeID(prevIndir), Compiler::dspTreeID(cur));
9351
+ UnmarkTree(indir);
9352
+ return false;
9353
+ }
9354
+ }
9355
+
9356
+ if (cur == indir->Data())
9357
+ {
9358
+ break;
9359
+ }
9360
+ }
9361
+ }
9362
+
9363
+ JITDUMP("Moving nodes that are not part of data flow of [%06u]\n\n", Compiler::dspTreeID(indir));
9298
9364
9299
9365
GenTree* previous = prevIndir;
9300
9366
for (GenTree* node = prevIndir->gtNext;;)
@@ -9317,6 +9383,22 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
9317
9383
node = next;
9318
9384
}
9319
9385
9386
+ if (sawData)
9387
+ {
9388
+ // For some reason LSRA is not able to reuse a constant if both LIR
9389
+ // temps are live simultaneously, so skip moving in those cases and
9390
+ // expect LSRA to reuse the constant instead.
9391
+ if (indir->Data()->OperIs(GT_CNS_INT, GT_CNS_DBL) && GenTree::Compare(indir->Data(), prevIndir->Data()))
9392
+ {
9393
+ JITDUMP("Not moving previous indir since we are expecting constant reuse for the data\n");
9394
+ }
9395
+ else
9396
+ {
9397
+ BlockRange().Remove(prevIndir);
9398
+ BlockRange().InsertAfter(indir->Data(), prevIndir);
9399
+ }
9400
+ }
9401
+
9320
9402
JITDUMP("Result:\n\n");
9321
9403
INDEBUG(dumpWithMarks());
9322
9404
JITDUMP("\n");
0 commit comments