@@ -298,19 +298,52 @@ IdealLoopTree* PhaseIdealLoop::insert_outer_loop(IdealLoopTree* loop, LoopNode*
298298 return outer_ilt;
299299}
300300
301- // Create a skeleton strip mined outer loop: a Loop head before the
302- // inner strip mined loop, a safepoint and an exit condition guarded
303- // by an opaque node after the inner strip mined loop with a backedge
304- // to the loop head. The inner strip mined loop is left as it is. Only
305- // once loop optimizations are over, do we adjust the inner loop exit
306- // condition to limit its number of iterations, set the outer loop
307- // exit condition and add Phis to the outer loop head. Some loop
308- // optimizations that operate on the inner strip mined loop need to be
309- // aware of the outer strip mined loop: loop unswitching needs to
310- // clone the outer loop as well as the inner, unrolling needs to only
311- // clone the inner loop etc. No optimizations need to change the outer
312- // strip mined loop as it is only a skeleton.
313- IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop (BoolNode *test, Node *cmp, Node *init_control,
301+ // Create a skeleton strip mined outer loop: an OuterStripMinedLoop head before the inner strip mined CountedLoop, a
302+ // SafePoint on exit of the inner CountedLoopEnd and an OuterStripMinedLoopEnd test that can't constant fold until loop
303+ // optimizations are over. The inner strip mined loop is left as it is. Only once loop optimizations are over, do we
304+ // adjust the inner loop exit condition to limit its number of iterations, set the outer loop exit condition and add
305+ // Phis to the outer loop head. Some loop optimizations that operate on the inner strip mined loop need to be aware of
306+ // the outer strip mined loop: loop unswitching needs to clone the outer loop as well as the inner, unrolling needs to
307+ // only clone the inner loop etc. No optimizations need to change the outer strip mined loop as it is only a skeleton.
308+ //
309+ // Schematically:
310+ //
311+ // OuterStripMinedLoop -------|
312+ // | |
313+ // CountedLoop ----------- | |
314+ // \- Phi (iv) -| | |
315+ // / \ | | |
316+ // CmpI AddI --| | |
317+ // \ | |
318+ // Bool | |
319+ // \ | |
320+ // CountedLoopEnd | |
321+ // / \ | |
322+ // IfFalse IfTrue--------| |
323+ // | |
324+ // SafePoint |
325+ // | |
326+ // OuterStripMinedLoopEnd |
327+ // / \ |
328+ // IfFalse IfTrue-----------|
329+ // |
330+ //
331+ //
332+ // As loop optimizations transform the inner loop, the outer strip mined loop stays mostly unchanged. The only exception
333+ // is nodes referenced from the SafePoint and sunk from the inner loop: they end up in the outer strip mined loop.
334+ //
335+ // Not adding Phis to the outer loop head from the beginning, and only adding them after loop optimizations does not
336+ // conform to C2's IR rules: any variable or memory slice that is mutated in a loop should have a Phi. The main
337+ // motivation for such a design that doesn't conform to C2's IR rules is to allow existing loop optimizations to be
338+ // mostly unaffected by the outer strip mined loop: the only extra step needed in most cases is to step over the
339+ // OuterStripMinedLoop. The main drawback is that once loop optimizations are over, an extra step is needed to finish
340+ // constructing the outer loop. This is handled by OuterStripMinedLoopNode::adjust_strip_mined_loop().
341+ //
342+ // Adding Phis to the outer loop is largely straightforward: there needs to be one Phi in the outer loop for every Phi
343+ // in the inner loop. Things may be more complicated for sunk Store nodes: there may not be any inner loop Phi left
344+ // after sinking for a particular memory slice but the outer loop needs a Phi. See
345+ // OuterStripMinedLoopNode::handle_sunk_stores_when_finishing_construction()
346+ IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop (Node* init_control,
314347 IdealLoopTree* loop, float cl_prob, float le_fcnt,
315348 Node*& entry_control, Node*& iffalse) {
316349 Node* outer_test = intcon (0 );
@@ -2255,9 +2288,8 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
22552288 is_deleteable_safept (sfpt);
22562289 IdealLoopTree* outer_ilt = nullptr ;
22572290 if (strip_mine_loop) {
2258- outer_ilt = create_outer_strip_mined_loop (test, cmp, init_control, loop,
2259- cl_prob, le->_fcnt , entry_control,
2260- iffalse);
2291+ outer_ilt = create_outer_strip_mined_loop (init_control, loop, cl_prob, le->_fcnt ,
2292+ entry_control, iffalse);
22612293 }
22622294
22632295 // Now setup a new CountedLoopNode to replace the existing LoopNode
@@ -2870,10 +2902,11 @@ BaseCountedLoopNode* BaseCountedLoopNode::make(Node* entry, Node* backedge, Basi
28702902 return new LongCountedLoopNode (entry, backedge);
28712903}
28722904
2873- void OuterStripMinedLoopNode::fix_sunk_stores (CountedLoopEndNode* inner_cle, LoopNode* inner_cl, PhaseIterGVN* igvn,
2874- PhaseIdealLoop* iloop) {
2875- Node* cle_out = inner_cle->proj_out (false );
2876- Node* cle_tail = inner_cle->proj_out (true );
2905+ void OuterStripMinedLoopNode::fix_sunk_stores_when_back_to_counted_loop (PhaseIterGVN* igvn,
2906+ PhaseIdealLoop* iloop) const {
2907+ CountedLoopNode* inner_cl = inner_counted_loop ();
2908+ IfFalseNode* cle_out = inner_loop_exit ();
2909+
28772910 if (cle_out->outcnt () > 1 ) {
28782911 // Look for chains of stores that were sunk
28792912 // out of the inner loop and are in the outer loop
@@ -2988,11 +3021,90 @@ void OuterStripMinedLoopNode::fix_sunk_stores(CountedLoopEndNode* inner_cle, Loo
29883021 }
29893022}
29903023
3024+ // The outer strip mined loop is initially only partially constructed. In particular Phis are omitted.
3025+ // See comment above: PhaseIdealLoop::create_outer_strip_mined_loop()
3026+ // We're now in the process of finishing the construction of the outer loop. For each Phi in the inner loop, a Phi in
3027+ // the outer loop was just now created. However, Sunk Stores cause an extra challenge:
3028+ // 1) If all Stores in the inner loop were sunk for a particular memory slice, there's no Phi left for that memory slice
3029+ // in the inner loop anymore, and hence we did not yet add a Phi for the outer loop. So an extra Phi must now be
3030+ // added for each chain of sunk Stores for a particular memory slice.
3031+ // 2) If some Stores were sunk and some left in the inner loop, a Phi was already created in the outer loop but
3032+ // its backedge input wasn't wired correctly to the last Store of the chain: the backedge input was set to the
3033+ // backedge of the inner loop Phi instead, but it needs to be the last Store of the chain in the outer loop. We now
3034+ // have to fix that too.
3035+ void OuterStripMinedLoopNode::handle_sunk_stores_when_finishing_construction (PhaseIterGVN* igvn) {
3036+ IfFalseNode* cle_exit_proj = inner_loop_exit ();
3037+
3038+ // Find Sunk stores: Sunk stores are pinned on the loop exit projection of the inner loop. Indeed, because Sunk Stores
3039+ // modify the memory state captured by the SafePoint in the outer strip mined loop, they must be above it. The
3040+ // SafePoint's control input is the loop exit projection. It's also the only control out of the inner loop above the
3041+ // SafePoint.
3042+ #ifdef ASSERT
3043+ int stores_in_outer_loop_cnt = 0 ;
3044+ for (DUIterator_Fast imax, i = cle_exit_proj->fast_outs (imax); i < imax; i++) {
3045+ Node* u = cle_exit_proj->fast_out (i);
3046+ if (u->is_Store ()) {
3047+ stores_in_outer_loop_cnt++;
3048+ }
3049+ }
3050+ #endif
3051+
3052+ // Sunk stores are reachable from the memory state of the outer loop safepoint
3053+ SafePointNode* safepoint = outer_safepoint ();
3054+ MergeMemNode* mm = safepoint->in (TypeFunc::Memory)->isa_MergeMem ();
3055+ if (mm == nullptr ) {
3056+ // There is no MergeMem, which should only happen if there was no memory node
3057+ // sunk out of the loop.
3058+ assert (stores_in_outer_loop_cnt == 0 , " inconsistent" );
3059+ return ;
3060+ }
3061+ DEBUG_ONLY (int stores_in_outer_loop_cnt2 = 0 );
3062+ for (MergeMemStream mms (mm); mms.next_non_empty ();) {
3063+ Node* mem = mms.memory ();
3064+ // Traverse up the chain of stores to find the first store pinned
3065+ // at the loop exit projection.
3066+ Node* last = mem;
3067+ Node* first = nullptr ;
3068+ while (mem->is_Store () && mem->in (0 ) == cle_exit_proj) {
3069+ DEBUG_ONLY (stores_in_outer_loop_cnt2++);
3070+ first = mem;
3071+ mem = mem->in (MemNode::Memory);
3072+ }
3073+ if (first != nullptr ) {
3074+ // Found a chain of Stores that were sunk
3075+ // Do we already have a memory Phi for that slice on the outer loop? If that is the case, that Phi was created
3076+ // by cloning an inner loop Phi. The inner loop Phi should have mem, the memory state of the first Store out of
3077+ // the inner loop, as input on the backedge. So does the outer loop Phi given it's a clone.
3078+ Node* phi = nullptr ;
3079+ for (DUIterator_Fast imax, i = mem->fast_outs (imax); i < imax; i++) {
3080+ Node* u = mem->fast_out (i);
3081+ if (u->is_Phi () && u->in (0 ) == this && u->in (LoopBackControl) == mem) {
3082+ assert (phi == nullptr , " there should be only one" );
3083+ phi = u;
3084+ PRODUCT_ONLY (break );
3085+ }
3086+ }
3087+ if (phi == nullptr ) {
3088+ // No outer loop Phi? create one
3089+ phi = PhiNode::make (this , last);
3090+ phi->set_req (EntryControl, mem);
3091+ phi = igvn->transform (phi);
3092+ igvn->replace_input_of (first, MemNode::Memory, phi);
3093+ } else {
3094+ // Fix memory state along the backedge: it should be the last sunk Store of the chain
3095+ igvn->replace_input_of (phi, LoopBackControl, last);
3096+ }
3097+ }
3098+ }
3099+ assert (stores_in_outer_loop_cnt == stores_in_outer_loop_cnt2, " inconsistent" );
3100+ }
3101+
29913102void OuterStripMinedLoopNode::adjust_strip_mined_loop (PhaseIterGVN* igvn) {
3103+ verify_strip_mined (1 );
29923104 // Look for the outer & inner strip mined loop, reduce number of
29933105 // iterations of the inner loop, set exit condition of outer loop,
29943106 // construct required phi nodes for outer loop.
2995- CountedLoopNode* inner_cl = unique_ctrl_out ()-> as_CountedLoop ();
3107+ CountedLoopNode* inner_cl = inner_counted_loop ();
29963108 assert (inner_cl->is_strip_mined (), " inner loop should be strip mined" );
29973109 if (LoopStripMiningIter == 0 ) {
29983110 remove_outer_loop_and_safepoint (igvn);
@@ -3010,7 +3122,7 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
30103122 inner_cl->clear_strip_mined ();
30113123 return ;
30123124 }
3013- CountedLoopEndNode* inner_cle = inner_cl-> loopexit ();
3125+ CountedLoopEndNode* inner_cle = inner_counted_loop_end ();
30143126
30153127 int stride = inner_cl->stride_con ();
30163128 // For a min int stride, LoopStripMiningIter * stride overflows the int range for all values of LoopStripMiningIter
@@ -3091,8 +3203,9 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
30913203 }
30923204
30933205 Node* iv_phi = nullptr ;
3094- // Make a clone of each phi in the inner loop
3095- // for the outer loop
3206+ // Make a clone of each phi in the inner loop for the outer loop
3207+ // When Stores were Sunk, after this step, a Phi may still be missing or its backedge incorrectly wired. See
3208+ // handle_sunk_stores_when_finishing_construction()
30963209 for (uint i = 0 ; i < inner_cl->outcnt (); i++) {
30973210 Node* u = inner_cl->raw_out (i);
30983211 if (u->is_Phi ()) {
@@ -3111,6 +3224,8 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
31113224 }
31123225 }
31133226
3227+ handle_sunk_stores_when_finishing_construction (igvn);
3228+
31143229 if (iv_phi != nullptr ) {
31153230 // Now adjust the inner loop's exit condition
31163231 Node* limit = inner_cl->limit ();
@@ -3166,7 +3281,7 @@ void OuterStripMinedLoopNode::transform_to_counted_loop(PhaseIterGVN* igvn, Phas
31663281 CountedLoopEndNode* inner_cle = inner_cl->loopexit ();
31673282 Node* safepoint = outer_safepoint ();
31683283
3169- fix_sunk_stores (inner_cle, inner_cl, igvn, iloop);
3284+ fix_sunk_stores_when_back_to_counted_loop ( igvn, iloop);
31703285
31713286 // make counted loop exit test always fail
31723287 ConINode* zero = igvn->intcon (0 );
0 commit comments