Skip to content

Commit 199832a

Browse files
committed
8283466: C2: missing skeleton predicates in peeled loop
Reviewed-by: roland, chagedorn
1 parent a82417f commit 199832a

File tree

4 files changed

+248
-37
lines changed

4 files changed

+248
-37
lines changed

src/hotspot/share/opto/loopPredicate.cpp

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -491,24 +491,25 @@ Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
491491
}
492492

493493
Node* PhaseIdealLoop::skip_all_loop_predicates(Node* entry) {
494-
Node* predicate = NULL;
495-
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
496-
if (predicate != NULL) {
497-
entry = skip_loop_predicates(entry);
498-
}
499-
if (UseProfiledLoopPredicate) {
500-
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
501-
if (predicate != NULL) { // right pattern that can be used by loop predication
502-
entry = skip_loop_predicates(entry);
503-
}
504-
}
505-
if (UseLoopPredicate) {
506-
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
507-
if (predicate != NULL) { // right pattern that can be used by loop predication
508-
entry = skip_loop_predicates(entry);
494+
Predicates predicates(entry);
495+
return predicates.skip_all();
496+
}
497+
498+
//--------------------------next_predicate---------------------------------
499+
// Find next related predicate, useful for iterating over all related predicates
500+
ProjNode* PhaseIdealLoop::next_predicate(ProjNode* predicate) {
501+
IfNode* iff = predicate->in(0)->as_If();
502+
ProjNode* uncommon_proj = iff->proj_out(1 - predicate->_con);
503+
Node* rgn = uncommon_proj->unique_ctrl_out();
504+
assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
505+
Node* next = iff->in(0);
506+
if (next != nullptr && next->is_Proj() && next->in(0)->is_If()) {
507+
uncommon_proj = next->in(0)->as_If()->proj_out(1 - next->as_Proj()->_con);
508+
if (uncommon_proj->unique_ctrl_out() == rgn) { // lead into same region
509+
return next->as_Proj();
509510
}
510511
}
511-
return entry;
512+
return nullptr;
512513
}
513514

514515
//--------------------------find_predicate_insertion_point-------------------
@@ -522,6 +523,28 @@ ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimi
522523
return NULL;
523524
}
524525

526+
//--------------------------Predicates::Predicates--------------------------
527+
// given loop entry, find all predicates above loop
528+
PhaseIdealLoop::Predicates::Predicates(Node* entry) {
529+
_loop_limit_check = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
530+
if (_loop_limit_check != nullptr) {
531+
entry = skip_loop_predicates(entry);
532+
}
533+
if (UseProfiledLoopPredicate) {
534+
_profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
535+
if (_profile_predicate != nullptr) {
536+
entry = skip_loop_predicates(entry);
537+
}
538+
}
539+
if (UseLoopPredicate) {
540+
_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
541+
if (_predicate != nullptr) {
542+
entry = skip_loop_predicates(entry);
543+
}
544+
}
545+
_entry_to_all_predicates = entry;
546+
}
547+
525548
//--------------------------find_predicate------------------------------------
526549
// Find a predicate
527550
Node* PhaseIdealLoop::find_predicate(Node* entry) {

src/hotspot/share/opto/loopTransform.cpp

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -614,13 +614,16 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
614614
// after peel and predicate move
615615
//
616616
// stmt1
617+
// |
618+
// v
619+
// loop predicate
617620
// /
618621
// /
619622
// clone / orig
620623
// /
621624
// / +----------+
622625
// / | |
623-
// / loop predicate |
626+
// / | |
624627
// / | |
625628
// v v |
626629
// TOP-->loop clone loop<----+ |
@@ -647,7 +650,10 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
647650
//
648651
// final graph
649652
//
650-
// stmt1
653+
// stmt1
654+
// |
655+
// v
656+
// loop predicate
651657
// |
652658
// v
653659
// stmt2 clone
@@ -660,7 +666,7 @@ void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree* loop, Node_List& old_ne
660666
// false true
661667
// | |
662668
// | v
663-
// | loop predicate
669+
// | initialized skeleton predicates
664670
// | |
665671
// | v
666672
// | loop<----+
@@ -714,7 +720,9 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
714720

715721
// Step 1: Clone the loop body. The clone becomes the peeled iteration.
716722
// The pre-loop illegally has 2 control users (old & new loops).
717-
clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
723+
const uint idx_before_clone = Compile::current()->unique();
724+
LoopNode* outer_loop_head = head->skip_strip_mined();
725+
clone_loop(loop, old_new, dom_depth(outer_loop_head), ControlAroundStripMined);
718726

719727
// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
720728
// Do this by making the old-loop fall-in edges act as if they came
@@ -723,8 +731,8 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
723731
// the pre-loop with only 1 user (the new peeled iteration), but the
724732
// peeled-loop backedge has 2 users.
725733
Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
726-
_igvn.hash_delete(head->skip_strip_mined());
727-
head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
734+
_igvn.hash_delete(outer_loop_head);
735+
outer_loop_head->set_req(LoopNode::EntryControl, new_entry);
728736
for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
729737
Node* old = head->fast_out(j);
730738
if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
@@ -753,16 +761,33 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
753761

754762
// Step 4: Correct dom-depth info. Set to loop-head depth.
755763

756-
int dd = dom_depth(head->skip_strip_mined());
757-
set_idom(head->skip_strip_mined(), head->skip_strip_mined()->in(LoopNode::EntryControl), dd);
764+
int dd_outer_loop_head = dom_depth(outer_loop_head);
765+
set_idom(outer_loop_head, outer_loop_head->in(LoopNode::EntryControl), dd_outer_loop_head);
758766
for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
759767
Node *old = loop->_body.at(j3);
760768
Node *nnn = old_new[old->_idx];
761769
if (!has_ctrl(nnn)) {
762-
set_idom(nnn, idom(nnn), dd-1);
770+
set_idom(nnn, idom(nnn), dd_outer_loop_head-1);
763771
}
764772
}
765773

774+
// Step 5: skeleton_predicates instantiation
775+
if (counted_loop && UseLoopPredicate) {
776+
CountedLoopNode *cl_head = head->as_CountedLoop();
777+
Node* init = cl_head->init_trip();
778+
Node* stride = cl_head->stride();
779+
IdealLoopTree* outer_loop = get_loop(outer_loop_head);
780+
Predicates predicates(new_head->in(LoopNode::EntryControl));
781+
initialize_skeleton_predicates_for_peeled_loop(predicates.predicate(),
782+
outer_loop_head, dd_outer_loop_head,
783+
init, stride, outer_loop,
784+
idx_before_clone, old_new);
785+
initialize_skeleton_predicates_for_peeled_loop(predicates.profile_predicate(),
786+
outer_loop_head, dd_outer_loop_head,
787+
init, stride, outer_loop,
788+
idx_before_clone, old_new);
789+
}
790+
766791
// Now force out all loop-invariant dominating tests. The optimizer
767792
// finds some, but we _know_ they are all useless.
768793
peeled_dom_test_elim(loop,old_new);
@@ -1318,12 +1343,12 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicat
13181343
// Clone the skeleton predicate twice and initialize one with the initial
13191344
// value of the loop induction variable. Leave the other predicate
13201345
// to be initialized when increasing the stride during loop unrolling.
1321-
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, opaque_init, NULL, predicate, uncommon_proj,
1322-
current_proj, outer_loop, prev_proj);
1346+
prev_proj = clone_skeleton_predicate_and_initialize(iff, opaque_init, NULL, predicate, uncommon_proj,
1347+
current_proj, outer_loop, prev_proj);
13231348
assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
13241349

1325-
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, predicate, uncommon_proj,
1326-
current_proj, outer_loop, prev_proj);
1350+
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj,
1351+
current_proj, outer_loop, prev_proj);
13271352
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "");
13281353

13291354
// Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
@@ -1476,8 +1501,8 @@ Node* PhaseIdealLoop::clone_skeleton_predicate_bool(Node* iff, Node* new_init, N
14761501

14771502
// Clone a skeleton predicate for the main loop. new_init and new_stride are set as new inputs. Since the predicates cannot fail at runtime,
14781503
// Halt nodes are inserted instead of uncommon traps.
1479-
Node* PhaseIdealLoop::clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
1480-
Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
1504+
Node* PhaseIdealLoop::clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj,
1505+
Node* control, IdealLoopTree* outer_loop, Node* input_proj) {
14811506
Node* result = clone_skeleton_predicate_bool(iff, new_init, new_stride, control);
14821507
Node* proj = predicate->clone();
14831508
Node* other_proj = uncommon_proj->clone();
@@ -2007,8 +2032,8 @@ void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoo
20072032
_igvn.replace_input_of(iff, 1, iff->in(1)->in(2));
20082033
} else {
20092034
// Add back predicates updated for the new stride.
2010-
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, max_value, entry, proj, ctrl, outer_loop,
2011-
prev_proj);
2035+
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, max_value, entry, proj, ctrl, outer_loop,
2036+
prev_proj);
20122037
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
20132038
}
20142039
}
@@ -2036,8 +2061,8 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
20362061
break;
20372062
}
20382063
if (iff->in(1)->Opcode() == Op_Opaque4 && skeleton_predicate_has_opaque(iff)) {
2039-
prev_proj = clone_skeleton_predicate_for_main_or_post_loop(iff, init, stride, ctrl, proj, post_loop_entry,
2040-
post_loop, prev_proj);
2064+
prev_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, ctrl, proj, post_loop_entry,
2065+
post_loop, prev_proj);
20412066
assert(!skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()), "unexpected");
20422067
}
20432068
ctrl = ctrl->in(0)->in(0);
@@ -2048,6 +2073,52 @@ void PhaseIdealLoop::copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_h
20482073
}
20492074
}
20502075

2076+
void PhaseIdealLoop::initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate,
2077+
LoopNode* outer_loop_head,
2078+
int dd_outer_loop_head,
2079+
Node* init,
2080+
Node* stride,
2081+
IdealLoopTree* outer_loop,
2082+
const uint idx_before_clone,
2083+
const Node_List &old_new) {
2084+
if (predicate == nullptr) {
2085+
return;
2086+
}
2087+
Node* control = outer_loop_head->in(LoopNode::EntryControl);
2088+
Node* input_proj = control;
2089+
2090+
predicate = next_predicate(predicate);
2091+
while (predicate != nullptr) {
2092+
IfNode* iff = predicate->in(0)->as_If();
2093+
if (iff->in(1)->Opcode() == Op_Opaque4) {
2094+
assert(skeleton_predicate_has_opaque(iff), "unexpected");
2095+
ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
2096+
input_proj = clone_skeleton_predicate_and_initialize(iff, init, stride, predicate, uncommon_proj, control, outer_loop, input_proj);
2097+
2098+
// Rewire any control inputs from the old skeleton predicates above the peeled iteration down to the initialized
2099+
// skeleton predicates above the peeled loop.
2100+
for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) {
2101+
Node* dependent = predicate->out(i);
2102+
Node* new_node = old_new[dependent->_idx];
2103+
2104+
if (!dependent->is_CFG() &&
2105+
dependent->_idx < idx_before_clone && // old node
2106+
new_node != nullptr && // cloned
2107+
new_node->_idx >= idx_before_clone) { // for peeling
2108+
// The old nodes from the peeled loop still point to the predicate above the peeled loop.
2109+
// We need to rewire the dependencies to the newly initialized skeleton predicates.
2110+
_igvn.replace_input_of(dependent, 0, input_proj);
2111+
--i; // correct for just deleted predicate->out(i)
2112+
}
2113+
}
2114+
}
2115+
predicate = next_predicate(predicate);
2116+
}
2117+
2118+
_igvn.replace_input_of(outer_loop_head, LoopNode::EntryControl, input_proj);
2119+
set_idom(outer_loop_head, input_proj, dd_outer_loop_head);
2120+
}
2121+
20512122
//------------------------------do_unroll--------------------------------------
20522123
// Unroll the loop body one step - make each trip do 2 iterations.
20532124
void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip) {

src/hotspot/share/opto/loopnode.hpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -927,13 +927,16 @@ class PhaseIdealLoop : public PhaseTransform {
927927
void copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* init, Node* stride, IdealLoopTree* outer_loop, LoopNode* outer_main_head,
928928
uint dd_main_head, const uint idx_before_pre_post, const uint idx_after_post_before_pre,
929929
Node* zero_trip_guard_proj_main, Node* zero_trip_guard_proj_post, const Node_List &old_new);
930-
Node* clone_skeleton_predicate_for_main_or_post_loop(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control,
931-
IdealLoopTree* outer_loop, Node* input_proj);
930+
Node* clone_skeleton_predicate_and_initialize(Node* iff, Node* new_init, Node* new_stride, Node* predicate, Node* uncommon_proj, Node* control,
931+
IdealLoopTree* outer_loop, Node* input_proj);
932932
Node* clone_skeleton_predicate_bool(Node* iff, Node* new_init, Node* new_stride, Node* control);
933933
static bool skeleton_predicate_has_opaque(IfNode* iff);
934934
static void get_skeleton_predicates(Node* predicate, Unique_Node_List& list, bool get_opaque = false);
935935
void update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con);
936936
void copy_skeleton_predicates_to_post_loop(LoopNode* main_loop_head, CountedLoopNode* post_loop_head, Node* init, Node* stride);
937+
void initialize_skeleton_predicates_for_peeled_loop(ProjNode* predicate, LoopNode* outer_loop_head, int dd_outer_loop_head,
938+
Node* init, Node* stride, IdealLoopTree* outer_loop,
939+
const uint idx_before_clone, const Node_List& old_new);
937940
void insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol);
938941
#ifdef ASSERT
939942
bool only_has_infinite_loops();
@@ -1328,9 +1331,43 @@ class PhaseIdealLoop : public PhaseTransform {
13281331

13291332
static Node* skip_all_loop_predicates(Node* entry);
13301333
static Node* skip_loop_predicates(Node* entry);
1334+
static ProjNode* next_predicate(ProjNode* predicate);
13311335

13321336
// Find a good location to insert a predicate
13331337
static ProjNode* find_predicate_insertion_point(Node* start_c, Deoptimization::DeoptReason reason);
1338+
1339+
class Predicates {
1340+
public:
1341+
// given loop entry, find all predicates above loop
1342+
Predicates(Node* entry);
1343+
1344+
// Proj of empty loop limit check predicate
1345+
ProjNode* loop_limit_check() {
1346+
return _loop_limit_check;
1347+
}
1348+
1349+
// Proj of empty profile predicate
1350+
ProjNode* profile_predicate() {
1351+
return _profile_predicate;
1352+
}
1353+
1354+
// Proj of empty predicate
1355+
ProjNode* predicate() {
1356+
return _predicate;
1357+
}
1358+
1359+
// First control node above all predicates
1360+
Node* skip_all() {
1361+
return _entry_to_all_predicates;
1362+
}
1363+
1364+
private:
1365+
ProjNode*_loop_limit_check = nullptr;
1366+
ProjNode* _profile_predicate = nullptr;
1367+
ProjNode* _predicate = nullptr;
1368+
Node* _entry_to_all_predicates = nullptr;
1369+
};
1370+
13341371
// Find a predicate
13351372
static Node* find_predicate(Node* entry);
13361373
// Construct a range check for a predicate if

0 commit comments

Comments
 (0)