# HG changeset patch # User thartmann # Date 1529403942 -7200 # Node ID fd430e35242733c8511d67df5c74e1f84f3c0b38 # Parent 7e1087eb6760a84a45d26a0ad9768e9cd26f4478 8205033: [REDO] Induction variable of over-unrolled loop conflicts with range checks Summary: Update skeleton predicates before main loop during unrolling to remove dead code. Reviewed-by: kvn, roland diff -r 7e1087eb6760 -r fd430e352427 src/hotspot/share/opto/loopPredicate.cpp --- a/src/hotspot/share/opto/loopPredicate.cpp Tue Jun 19 12:22:02 2018 +0200 +++ b/src/hotspot/share/opto/loopPredicate.cpp Tue Jun 19 12:25:42 2018 +0200 @@ -91,7 +91,7 @@ // // // We will create a region to guard the uct call if there is no one there. -// The true projecttion (if_cont) of the new_iff is returned. +// The true projection (if_cont) of the new_iff is returned. // This code is also used to clone predicates to cloned loops. ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, Deoptimization::DeoptReason reason, @@ -1212,10 +1212,10 @@ // After pre/main/post loops are created, we'll put a copy of some -// range checks between the pre and main loop to validate the initial -// value of the induction variable for the main loop. Make a copy of -// the predicates here with an opaque node as a place holder for the -// initial value. +// range checks between the pre and main loop to validate the value +// of the main loop induction variable. Make a copy of the predicates +// here with an opaque node as a place holder for the value (will be +// updated by PhaseIdealLoop::update_skeleton_predicate()). ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj, ProjNode* upper_bound_proj, diff -r 7e1087eb6760 -r fd430e352427 src/hotspot/share/opto/loopTransform.cpp --- a/src/hotspot/share/opto/loopTransform.cpp Tue Jun 19 12:22:02 2018 +0200 +++ b/src/hotspot/share/opto/loopTransform.cpp Tue Jun 19 12:25:42 2018 +0200 @@ -1059,8 +1059,7 @@ // loop is never executed). When that happens, range check // CastII/ConvI2L nodes cause some data paths to die. For consistency, // the control paths must die too but the range checks were removed by -// predication. The range checks that we add here guarantee that they -// do. +// predication. The range checks that we add here guarantee that they do. void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop, LoopNode* outer_main_head, uint dd_main_head) { if (predicate != NULL) { @@ -1069,108 +1068,126 @@ Node* rgn = uncommon_proj->unique_ctrl_out(); assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape"); - predicate = predicate->in(0)->in(0); + predicate = iff->in(0); Node* current_proj = outer_main_head->in(LoopNode::EntryControl); Node* prev_proj = current_proj; while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) { - uncommon_proj = predicate->in(0)->as_If()->proj_out(1 - predicate->as_Proj()->_con); + iff = predicate->in(0)->as_If(); + uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con); if (uncommon_proj->unique_ctrl_out() != rgn) break; - iff = predicate->in(0)->as_If(); if (iff->in(1)->Opcode() == Op_Opaque4) { - Node_Stack to_clone(2); - to_clone.push(iff->in(1), 1); - uint current = C->unique(); - Node* result = NULL; - // Look for the opaque node to replace with the init value - // and clone everything in between. We keep the Opaque4 node - // so the duplicated predicates are eliminated once loop - // opts are over: they are here only to keep the IR graph - // consistent. - do { - Node* n = to_clone.node(); - uint i = to_clone.index(); - Node* m = n->in(i); - int op = m->Opcode(); - if (m->is_Bool() || - m->is_Cmp() || - op == Op_AndL || - op == Op_OrL || - op == Op_RShiftL || - op == Op_LShiftL || - op == Op_AddL || - op == Op_AddI || - op == Op_MulL || - op == Op_MulI || - op == Op_SubL || - op == Op_SubI || - op == Op_ConvI2L) { - to_clone.push(m, 1); - continue; - } - if (op == Op_Opaque1) { - if (n->_idx < current) { - n = n->clone(); - } - n->set_req(i, castii); - register_new_node(n, current_proj); - to_clone.set_node(n); - } - for (;;) { - Node* cur = to_clone.node(); - uint j = to_clone.index(); - if (j+1 < cur->req()) { - to_clone.set_index(j+1); - break; - } - to_clone.pop(); - if (to_clone.size() == 0) { - result = cur; - break; - } - Node* next = to_clone.node(); - j = to_clone.index(); - if (cur->_idx >= current) { - if (next->_idx < current) { - next = next->clone(); - register_new_node(next, current_proj); - to_clone.set_node(next); - } - assert(next->in(j) != cur, "input should have been cloned"); - next->set_req(j, cur); - } - } - } while (result == NULL); - assert(result->_idx >= current, "new node expected"); - - Node* proj = predicate->clone(); - Node* other_proj = uncommon_proj->clone(); - Node* new_iff = iff->clone(); - new_iff->set_req(1, result); - proj->set_req(0, new_iff); - other_proj->set_req(0, new_iff); - Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr); - register_new_node(frame, C->start()); - // It's impossible for the predicate to fail at runtime. Use - // an Halt node. - Node* halt = new HaltNode(other_proj, frame); - C->root()->add_req(halt); - new_iff->set_req(0, prev_proj); - - register_control(new_iff, outer_loop->_parent, prev_proj); - register_control(proj, outer_loop->_parent, new_iff); - register_control(other_proj, _ltree_root, new_iff); - register_control(halt, _ltree_root, other_proj); - - prev_proj = proj; + // Clone the predicate twice and initialize one with the initial + // value of the loop induction variable. Leave the other predicate + // to be initialized when increasing the stride during loop unrolling. + prev_proj = update_skeleton_predicate(iff, castii, predicate, uncommon_proj, current_proj, outer_loop, prev_proj); + Node* value = new Opaque1Node(C, castii); + register_new_node(value, current_proj); + prev_proj = update_skeleton_predicate(iff, value, predicate, uncommon_proj, current_proj, outer_loop, prev_proj); + // Remove the skeleton predicate from the pre-loop + _igvn.replace_input_of(iff, 1, _igvn.intcon(1)); } predicate = predicate->in(0)->in(0); } - if (prev_proj != current_proj) { - _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj); - set_idom(outer_main_head, prev_proj, dd_main_head); + _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj); + set_idom(outer_main_head, prev_proj, dd_main_head); + } +} + +Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj, + Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj) { + bool clone = (outer_loop != NULL); // Clone the predicate? + Node_Stack to_clone(2); + to_clone.push(iff->in(1), 1); + uint current = C->unique(); + Node* result = NULL; + // Look for the opaque node to replace with the new value + // and clone everything in between. We keep the Opaque4 node + // so the duplicated predicates are eliminated once loop + // opts are over: they are here only to keep the IR graph + // consistent. + do { + Node* n = to_clone.node(); + uint i = to_clone.index(); + Node* m = n->in(i); + int op = m->Opcode(); + if (m->is_Bool() || + m->is_Cmp() || + op == Op_AndL || + op == Op_OrL || + op == Op_RShiftL || + op == Op_LShiftL || + op == Op_AddL || + op == Op_AddI || + op == Op_MulL || + op == Op_MulI || + op == Op_SubL || + op == Op_SubI || + op == Op_ConvI2L) { + to_clone.push(m, 1); + continue; } + if (op == Op_Opaque1) { + if (!clone) { + // Update the input of the Opaque1Node and exit + _igvn.replace_input_of(m, 1, value); + return prev_proj; + } + if (n->_idx < current) { + n = n->clone(); + } + n->set_req(i, value); + register_new_node(n, current_proj); + to_clone.set_node(n); + } + for (;;) { + Node* cur = to_clone.node(); + uint j = to_clone.index(); + if (j+1 < cur->req()) { + to_clone.set_index(j+1); + break; + } + to_clone.pop(); + if (to_clone.size() == 0) { + result = cur; + break; + } + Node* next = to_clone.node(); + j = to_clone.index(); + if (clone && cur->_idx >= current) { + if (next->_idx < current) { + next = next->clone(); + register_new_node(next, current_proj); + to_clone.set_node(next); + } + assert(next->in(j) != cur, "input should have been cloned"); + next->set_req(j, cur); + } + } + } while (result == NULL); + if (!clone) { + return NULL; } + assert(result->_idx >= current, "new node expected"); + + Node* proj = predicate->clone(); + Node* other_proj = uncommon_proj->clone(); + Node* new_iff = iff->clone(); + new_iff->set_req(1, result); + proj->set_req(0, new_iff); + other_proj->set_req(0, new_iff); + Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr); + register_new_node(frame, C->start()); + // It's impossible for the predicate to fail at runtime. Use an Halt node. + Node* halt = new HaltNode(other_proj, frame); + C->root()->add_req(halt); + new_iff->set_req(0, prev_proj); + + register_control(new_iff, outer_loop->_parent, prev_proj); + register_control(proj, outer_loop->_parent, new_iff); + register_control(other_proj, _ltree_root, new_iff); + register_control(halt, _ltree_root, other_proj); + return proj; } void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop, @@ -1682,6 +1699,30 @@ assert(old_trip_count > 1 && (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity"); + if (UseLoopPredicate) { + // Search for skeleton predicates and update them according to the new stride + Node* entry = ctrl; + while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) { + IfNode* iff = entry->in(0)->as_If(); + ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con); + if (proj->unique_ctrl_out()->Opcode() != Op_Halt) { + break; + } + if (iff->in(1)->Opcode() == Op_Opaque4) { + // Compute the value of the loop induction variable at the end of the + // first iteration of the unrolled loop: init + new_stride_con - init_inc + int init_inc = stride_con/loop_head->unrolled_count(); + assert(init_inc != 0, "invalid loop increment"); + int new_stride_con = stride_con * 2; + Node* max_value = _igvn.intcon(new_stride_con - init_inc); + max_value = new AddINode(init, max_value); + register_new_node(max_value, get_ctrl(iff->in(1))); + update_skeleton_predicate(iff, max_value); + } + entry = entry->in(0)->in(0); + } + } + // Adjust loop limit to keep valid iterations number after unroll. // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride // which may overflow. diff -r 7e1087eb6760 -r fd430e352427 src/hotspot/share/opto/loopnode.hpp --- a/src/hotspot/share/opto/loopnode.hpp Tue Jun 19 12:22:02 2018 +0200 +++ b/src/hotspot/share/opto/loopnode.hpp Tue Jun 19 12:25:42 2018 +0200 @@ -744,6 +744,8 @@ LoopNode* outer_main_head, uint dd_main_head); void duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop, LoopNode* outer_main_head, uint dd_main_head); + Node* update_skeleton_predicate(Node* iff, Node* value, Node* predicate = NULL, Node* uncommon_proj = NULL, + Node* current_proj = NULL, IdealLoopTree* outer_loop = NULL, Node* prev_proj = NULL); public: diff -r 7e1087eb6760 -r fd430e352427 test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java --- a/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java Tue Jun 19 12:22:02 2018 +0200 +++ b/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java Tue Jun 19 12:25:42 2018 +0200 @@ -23,11 +23,12 @@ /** * @test - * @bug 8193130 + * @bug 8193130 8203915 * @summary Bad graph when unrolled loop bounds conflicts with range checks * * @run main/othervm IterationSplitPredicateInconsistency * @run main/othervm -XX:-UseLoopPredicate IterationSplitPredicateInconsistency + * @run main/othervm -XX:LoopStripMiningIter=0 IterationSplitPredicateInconsistency * */ diff -r 7e1087eb6760 -r fd430e352427 test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java --- a/test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java Tue Jun 19 12:22:02 2018 +0200 +++ b/test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java Tue Jun 19 12:25:42 2018 +0200 @@ -23,11 +23,12 @@ /* * @test - * @bug 8159016 8202949 + * @bug 8159016 8202949 8203915 * @summary Tests correct dominator information after over-unrolling a loop. * @requires vm.gc == "Parallel" | vm.gc == "null" * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions * -Xcomp -XX:-TieredCompilation -XX:-UseSwitchProfiling + * -XX:-UseCountedLoopSafepoints -XX:LoopUnrollLimit=250 * -XX:-UseG1GC -XX:+UseParallelGC compiler.loopopts.TestOverunrolling */ @@ -81,11 +82,74 @@ } } + // Similar to test2 but we cannot statically determine the upper bound of + // the inner for loop and can therefore not prevent over-unrolling. + public static void test3(int[] array) { + int[] iArr = new int[8]; + for (int i = 0; i < array.length; i++) { + for (int j = 5; j < i; j++) { + int k = 1; + do { + iArr[j] = 0; + switch (k) { + case 1: + lFld = 0; + break; + case 10: + dFld = 0; + break; + } + } while (++k < 1); + } + } + } + + // Similar to test3 but with negative stride and constant outer loop limit + public static void test4(int[] array, boolean store) { + int[] iArr = new int[8]; + for (int i = -8; i < 8; i++) { + for (int j = 5; j > i; j--) { + int k = 1; + do { + if (store) { + iArr[j] = 0; + } + switch (k) { + case 1: + lFld = 0; + break; + case 10: + dFld = 0; + break; + } + } while (++k < 1); + } + } + } + + // The inner for-loop is over-unrolled and vectorized resulting in + // a crash in the matcher because the memory input to a vector is top. + public static int test5(int[] array) { + int result = 0; + int[] iArr = new int[8]; + for (int i = 0; i < array.length; i++) { + for (int j = 5; j < i; j++) { + iArr[j] += array[j]; + result += array[j]; + } + } + return result; + } + public static void main(String args[]) { for (int i = 0; i < 42; ++i) { test1(i); } test2(); + int[] array = new int[8]; + test3(array); + test4(array, false); + test5(array); } }