@@ -614,13 +614,16 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
614614// after peel and predicate move
615615//
616616// stmt1
617+ // |
618+ // v
619+ // loop predicate
617620// /
618621// /
619622// clone / orig
620623// /
621624// / +----------+
622625// / | |
623- // / loop predicate |
626+ // / | |
624627// / | |
625628// v v |
626629// TOP-->loop clone loop<----+ |
@@ -647,7 +650,10 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
647650//
648651// final graph
649652//
650- // stmt1
653+ // stmt1
654+ // |
655+ // v
656+ // loop predicate
651657// |
652658// v
653659// stmt2 clone
@@ -660,7 +666,7 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
660666// false true
661667// | |
662668// | v
663- // | loop predicate
669+ // | initialized skeleton predicates
664670// | |
665671// | v
666672// | loop<----+
@@ -714,7 +720,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
714720
715721 // Step 1: Clone the loop body. The clone becomes the peeled iteration.
716722 // The pre-loop illegally has 2 control users (old & new loops).
717- clone_loop (loop, old_new, dom_depth (head->skip_strip_mined ()), ControlAroundStripMined);
723+ const uint idx_before_clone = Compile::current ()->unique ();
724+ LoopNode* outer_loop_head = head->skip_strip_mined ();
725+ clone_loop (loop, old_new, dom_depth (outer_loop_head), ControlAroundStripMined);
718726
719727 // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
720728 // Do this by making the old-loop fall-in edges act as if they came
@@ -723,8 +731,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
723731 // the pre-loop with only 1 user (the new peeled iteration), but the
724732 // peeled-loop backedge has 2 users.
725733 Node* new_entry = old_new[head->in (LoopNode::LoopBackControl)->_idx ];
726- _igvn.hash_delete (head-> skip_strip_mined () );
727- head-> skip_strip_mined () ->set_req (LoopNode::EntryControl, new_entry);
734+ _igvn.hash_delete (outer_loop_head );
735+ outer_loop_head ->set_req (LoopNode::EntryControl, new_entry);
728736 for (DUIterator_Fast jmax, j = head->fast_outs (jmax); j < jmax; j++) {
729737 Node* old = head->fast_out (j);
730738 if (old->in (0 ) == loop->_head && old->req () == 3 && old->is_Phi ()) {
@@ -753,16 +761,33 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
753761
754762 // Step 4: Correct dom-depth info. Set to loop-head depth.
755763
756- int dd = dom_depth (head-> skip_strip_mined () );
757- set_idom (head-> skip_strip_mined (), head-> skip_strip_mined ()-> in (LoopNode::EntryControl), dd );
764+ int dd_outer_loop_head = dom_depth (outer_loop_head );
765+ set_idom (outer_loop_head, outer_loop_head-> in (LoopNode::EntryControl), dd_outer_loop_head );
758766 for (uint j3 = 0 ; j3 < loop->_body .size (); j3++) {
759767 Node *old = loop->_body .at (j3);
760768 Node *nnn = old_new[old->_idx ];
761769 if (!has_ctrl (nnn)) {
762- set_idom (nnn, idom (nnn), dd -1 );
770+ set_idom (nnn, idom (nnn), dd_outer_loop_head -1 );
763771 }
764772 }
765773
774+ // Step 5: skeleton_predicates instantiation
775+ if (counted_loop && UseLoopPredicate) {
776+ CountedLoopNode *cl_head = head->as_CountedLoop ();
777+ Node* init = cl_head->init_trip ();
778+ Node* stride = cl_head->stride ();
779+ IdealLoopTree* outer_loop = get_loop (outer_loop_head);
780+ Predicates predicates (new_head->in (LoopNode::EntryControl));
781+ initialize_skeleton_predicates_for_peeled_loop (predicates.predicate (),
782+ outer_loop_head, dd_outer_loop_head,
783+ init, stride, outer_loop,
784+ idx_before_clone, old_new);
785+ initialize_skeleton_predicates_for_peeled_loop (predicates.profile_predicate (),
786+ outer_loop_head, dd_outer_loop_head,
787+ init, stride, outer_loop,
788+ idx_before_clone, old_new);
789+ }
790+
766791 // Now force out all loop-invariant dominating tests. The optimizer
767792 // finds some, but we _know_ they are all useless.
768793 peeled_dom_test_elim (loop,old_new);
@@ -1318,12 +1343,12 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicat
13181343 // Clone the skeleton predicate twice and initialize one with the initial
13191344 // value of the loop induction variable. Leave the other predicate
13201345 // to be initialized when increasing the stride during loop unrolling.
1321- prev_proj = clone_skeleton_predicate_for_main_or_post_loop (iff, opaque_init, NULL , predicate, uncommon_proj,
1322- current_proj, outer_loop, prev_proj);
1346+ prev_proj = clone_skeleton_predicate_and_initialize (iff, opaque_init, NULL , predicate, uncommon_proj,
1347+ current_proj, outer_loop, prev_proj);
13231348 assert (skeleton_predicate_has_opaque (prev_proj->in (0 )->as_If ()), " " );
13241349
1325- prev_proj = clone_skeleton_predicate_for_main_or_post_loop (iff, init, stride, predicate, uncommon_proj,
1326- current_proj, outer_loop, prev_proj);
1350+ prev_proj = clone_skeleton_predicate_and_initialize (iff, init, stride, predicate, uncommon_proj,
1351+ current_proj, outer_loop, prev_proj);
13271352 assert (!skeleton_predicate_has_opaque (prev_proj->in (0 )->as_If ()), " " );
13281353
13291354 // Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
@@ -1476,8 +1501,8 @@ Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, N
14761501
14771502// Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime,
14781503// Halt nodes are inserted instead of uncommon traps.
1479- Node* PhaseIdealLoop::clone_skeleton_predicate_for_main_or_post_loop (Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
1480- Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
1504+ Node* PhaseIdealLoop::clone_skeleton_predicate_and_initialize (Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
1505+ Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
14811506 Node* result = clone_skeleton_predicate_bool (iff, new_init, new_stride, control);
14821507 Node* proj = predicate->clone ();
14831508 Node* other_proj = uncommon_proj->clone ();
@@ -2007,8 +2032,8 @@ void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoo
20072032 _igvn.replace_input_of (iff, 1 , iff->in (1 )->in (2 ));
20082033 } else {
20092034 // Add back predicates updated for the new stride.
2010- prev_proj = clone_skeleton_predicate_for_main_or_post_loop (iff, init, max_value, entry, proj, ctrl, outer_loop,
2011- prev_proj);
2035+ prev_proj = clone_skeleton_predicate_and_initialize (iff, init, max_value, entry, proj, ctrl, outer_loop,
2036+ prev_proj);
20122037 assert (!skeleton_predicate_has_opaque (prev_proj->in (0 )->as_If ()), " unexpected" );
20132038 }
20142039 }
@@ -2036,8 +2061,8 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
20362061 break ;
20372062 }
20382063 if (iff->in (1 )->Opcode () == Op_Opaque4 && skeleton_predicate_has_opaque (iff)) {
2039- prev_proj = clone_skeleton_predicate_for_main_or_post_loop (iff, init, stride, ctrl, proj, post_loop_entry,
2040- post_loop, prev_proj);
2064+ prev_proj = clone_skeleton_predicate_and_initialize (iff, init, stride, ctrl, proj, post_loop_entry,
2065+ post_loop, prev_proj);
20412066 assert (!skeleton_predicate_has_opaque (prev_proj->in (0 )->as_If ()), " unexpected" );
20422067 }
20432068 ctrl = ctrl->in (0 )->in (0 );
@@ -2048,6 +2073,52 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
20482073 }
20492074}
20502075
2076+ void PhaseIdealLoop::initialize_skeleton_predicates_for_peeled_loop (ProjNode* predicate,
2077+ LoopNode* outer_loop_head,
2078+ int dd_outer_loop_head,
2079+ Node* init,
2080+ Node* stride,
2081+ IdealLoopTree* outer_loop,
2082+ const uint idx_before_clone,
2083+ const Node_List &old_new) {
2084+ if (predicate == nullptr ) {
2085+ return ;
2086+ }
2087+ Node* control = outer_loop_head->in (LoopNode::EntryControl);
2088+ Node* input_proj = control;
2089+
2090+ predicate = next_predicate (predicate);
2091+ while (predicate != nullptr ) {
2092+ IfNode* iff = predicate->in (0 )->as_If ();
2093+ if (iff->in (1 )->Opcode () == Op_Opaque4) {
2094+ assert (skeleton_predicate_has_opaque (iff), " unexpected" );
2095+ ProjNode* uncommon_proj = iff->proj_out (1 - predicate->as_Proj ()->_con );
2096+ input_proj = clone_skeleton_predicate_and_initialize (iff, init, stride, predicate, uncommon_proj, control, outer_loop, input_proj);
2097+
2098+ // Rewire any control inputs from the old skeleton predicates above the peeled iteration down to the initialized
2099+ // skeleton predicates above the peeled loop.
2100+ for (DUIterator i = predicate->outs (); predicate->has_out (i); i++) {
2101+ Node* dependent = predicate->out (i);
2102+ Node* new_node = old_new[dependent->_idx ];
2103+
2104+ if (!dependent->is_CFG () &&
2105+ dependent->_idx < idx_before_clone && // old node
2106+ new_node != nullptr && // cloned
2107+ new_node->_idx >= idx_before_clone) { // for peeling
2108+ // The old nodes from the peeled loop still point to the predicate above the peeled loop.
2109+ // We need to rewire the dependencies to the newly initialized skeleton predicates.
2110+ _igvn.replace_input_of (dependent, 0 , input_proj);
2111+ --i; // correct for just deleted predicate->out(i)
2112+ }
2113+ }
2114+ }
2115+ predicate = next_predicate (predicate);
2116+ }
2117+
2118+ _igvn.replace_input_of (outer_loop_head, LoopNode::EntryControl, input_proj);
2119+ set_idom (outer_loop_head, input_proj, dd_outer_loop_head);
2120+ }
2121+
20512122// ------------------------------do_unroll--------------------------------------
20522123// Unroll the loop body one step - make each trip do 2 iterations.
20532124void PhaseIdealLoop::do_unroll (IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip) {
0 commit comments