# HG changeset patch # User roland # Date 1521775279 25200 # Node ID bde392011cd8b25bea671bd31e0e81c33ec38057 # Parent a3f1db30ab85c3021db2f705ed52533a7ebd409d 8193130: Bad graph when unrolled loop bounds conflicts with range checks Reviewed-by: kvn, thartmann diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/compile.cpp --- a/src/hotspot/share/opto/compile.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/compile.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -397,13 +397,20 @@ remove_range_check_cast(cast); } } - // Remove useless expensive node + // Remove useless expensive nodes for (int i = C->expensive_count()-1; i >= 0; i--) { Node* n = C->expensive_node(i); if (!useful.member(n)) { remove_expensive_node(n); } } + // Remove useless Opaque4 nodes + for (int i = opaque4_count() - 1; i >= 0; i--) { + Node* opaq = opaque4_node(i); + if (!useful.member(opaq)) { + remove_opaque4_node(opaq); + } + } // clean up the late inline lists remove_useless_late_inlines(&_string_late_inlines, useful); remove_useless_late_inlines(&_boxing_late_inlines, useful); @@ -1179,6 +1186,7 @@ _predicate_opaqs = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); _expensive_nodes = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); _range_check_casts = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); + _opaque4_nodes = new(comp_arena()) GrowableArray(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -1957,6 +1965,22 @@ assert(range_check_cast_count() == 0, "should be empty"); } +void Compile::add_opaque4_node(Node* n) { + assert(n->Opcode() == Op_Opaque4, "Opaque4 only"); + assert(!_opaque4_nodes->contains(n), "duplicate entry in Opaque4 list"); + _opaque4_nodes->append(n); +} + +// Remove all Opaque4 nodes. +void Compile::remove_opaque4_nodes(PhaseIterGVN &igvn) { + for (int i = opaque4_count(); i > 0; i--) { + Node* opaq = opaque4_node(i-1); + assert(opaq->Opcode() == Op_Opaque4, "Opaque4 only"); + igvn.replace_node(opaq, opaq->in(2)); + } + assert(opaque4_count() == 0, "should be empty"); +} + // StringOpts and late inlining of string methods void Compile::inline_string_calls(bool parse_time) { { @@ -2332,6 +2356,11 @@ } } + if (opaque4_count() > 0) { + C->remove_opaque4_nodes(igvn); + igvn.optimize(); + } + DEBUG_ONLY( _modified_nodes = NULL; ) } // (End scope of igvn; run destructor if necessary for asserts.) @@ -3332,6 +3361,20 @@ } break; } + case Op_CmpUL: { + if (!Matcher::has_match_rule(Op_CmpUL)) { + // We don't support unsigned long comparisons. Set 'max_idx_expr' + // to max_julong if < 0 to make the signed comparison fail. + ConINode* sign_pos = new ConINode(TypeInt::make(BitsPerLong - 1)); + Node* sign_bit_mask = new RShiftLNode(n->in(1), sign_pos); + Node* orl = new OrLNode(n->in(1), sign_bit_mask); + ConLNode* remove_sign_mask = new ConLNode(TypeLong::make(max_jlong)); + Node* andl = new AndLNode(orl, remove_sign_mask); + Node* cmp = new CmpLNode(andl, n->in(2)); + n->subsume_by(cmp, this); + } + break; + } default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/compile.hpp --- a/src/hotspot/share/opto/compile.hpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/compile.hpp Thu Mar 22 20:21:19 2018 -0700 @@ -416,6 +416,7 @@ GrowableArray* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. GrowableArray* _expensive_nodes; // List of nodes that are expensive to compute and that we'd better not let the GVN freely common GrowableArray* _range_check_casts; // List of CastII nodes with a range check dependency + GrowableArray* _opaque4_nodes; // List of Opaque4 nodes that have a default value ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -810,6 +811,16 @@ // Remove all range check dependent CastIINodes. void remove_range_check_casts(PhaseIterGVN &igvn); + void add_opaque4_node(Node* n); + void remove_opaque4_node(Node* n) { + if (_opaque4_nodes->contains(n)) { + _opaque4_nodes->remove(n); + } + } + Node* opaque4_node(int idx) const { return _opaque4_nodes->at(idx); } + int opaque4_count() const { return _opaque4_nodes->length(); } + void remove_opaque4_nodes(PhaseIterGVN &igvn); + // remove the opaque nodes that protect the predicates so that the unused checks and // uncommon traps will be eliminated from the graph. void cleanup_loop_predicates(PhaseIterGVN &igvn); diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/loopPredicate.cpp --- a/src/hotspot/share/opto/loopPredicate.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/loopPredicate.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -632,7 +632,7 @@ int scale, Node* offset, Node* init, Node* limit, jint stride, Node* range, bool upper, bool &overflow) { - jint con_limit = limit->is_Con() ? limit->get_int() : 0; + jint con_limit = (limit != NULL && limit->is_Con()) ? limit->get_int() : 0; jint con_init = init->is_Con() ? init->get_int() : 0; jint con_offset = offset->is_Con() ? offset->get_int() : 0; @@ -751,26 +751,7 @@ // Integer expressions may overflow, do long comparison range = new ConvI2LNode(range); register_new_node(range, ctrl); - if (!Matcher::has_match_rule(Op_CmpUL)) { - // We don't support unsigned long comparisons. Set 'max_idx_expr' - // to max_julong if < 0 to make the signed comparison fail. - ConINode* sign_pos = _igvn.intcon(BitsPerLong - 1); - set_ctrl(sign_pos, C->root()); - Node* sign_bit_mask = new RShiftLNode(max_idx_expr, sign_pos); - register_new_node(sign_bit_mask, ctrl); - // OR with sign bit to set all bits to 1 if negative (otherwise no change) - max_idx_expr = new OrLNode(max_idx_expr, sign_bit_mask); - register_new_node(max_idx_expr, ctrl); - // AND with 0x7ff... to unset the sign bit - ConLNode* remove_sign_mask = _igvn.longcon(max_jlong); - set_ctrl(remove_sign_mask, C->root()); - max_idx_expr = new AndLNode(max_idx_expr, remove_sign_mask); - register_new_node(max_idx_expr, ctrl); - - cmp = new CmpLNode(max_idx_expr, range); - } else { - cmp = new CmpULNode(max_idx_expr, range); - } + cmp = new CmpULNode(max_idx_expr, range); } else { cmp = new CmpUNode(max_idx_expr, range); } @@ -785,6 +766,29 @@ return bol; } +// After pre/main/post loops are created, we'll put a copy of some +// range checks between the pre and main loop to validate the initial +// value of the induction variable for the main loop. Make a copy of +// the predicates here with an opaque node as a place holder for the +// initial value. +ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop, + ProjNode* proj, ProjNode *predicate_proj, + ProjNode* upper_bound_proj, + int scale, Node* offset, + Node* init, Node* limit, jint stride, + Node* rng, bool &overflow) { + assert(proj->_con && predicate_proj->_con, "not a range check?"); + Node* opaque_init = new Opaque1Node(C, init); + register_new_node(opaque_init, upper_bound_proj); + BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow); + Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); // This will go away once loop opts are over + register_new_node(opaque_bol, upper_bound_proj); + ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode()); + _igvn.replace_input_of(new_proj->in(0), 1, opaque_bol); + assert(opaque_init->outcnt() > 0, "should be used"); + return new_proj; +} + //------------------------------ loop_predication_impl-------------------------- // Insert loop predicates for null checks and range checks bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { @@ -980,6 +984,10 @@ // any dependent nodes onto the upper bound test. new_predicate_proj = upper_bound_proj; + if (iff->is_RangeCheck()) { + new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow); + } + #ifndef PRODUCT if (TraceLoopOpts && !TraceLoopPredicate) { tty->print("Predicate RC "); diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/loopTransform.cpp --- a/src/hotspot/share/opto/loopTransform.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/loopTransform.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -982,7 +982,7 @@ return n; } -bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) { +Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) { Node* castii = new CastIINode(incr, TypeInt::INT, true); castii->set_req(0, ctrl); register_new_node(castii, ctrl); @@ -990,10 +990,138 @@ Node* n = incr->fast_out(i); if (n->is_Phi() && n->in(0) == loop) { int nrep = n->replace_edge(incr, castii); - return true; + return castii; } } - return false; + return NULL; +} + +// Make a copy of the skeleton range check predicates before the main +// loop and set the initial value of loop as input. After unrolling, +// the range of values for the induction variable in the main loop can +// fall outside the allowed range of values by the array access (main +// loop is never executed). When that happens, range check +// CastII/ConvI2L nodes cause some data paths to die. For consistency, +// the control paths must die too but the range checks were removed by +// predication. The range checks that we add here guarantee that they +// do. +void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii, + IdealLoopTree* outer_loop, LoopNode* outer_main_head, + uint dd_main_head) { + if (UseLoopPredicate) { + Node* entry = pre_head->in(LoopNode::EntryControl); + Node* predicate = NULL; + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); + if (predicate != NULL) { + entry = entry->in(0)->in(0); + } + predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate); + if (predicate != NULL) { + IfNode* iff = entry->in(0)->as_If(); + ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con); + Node* rgn = uncommon_proj->unique_ctrl_out(); + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); + assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape"); + entry = entry->in(0)->in(0); + Node* prev_proj = min_taken; + while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) { + uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con); + if (uncommon_proj->unique_ctrl_out() != rgn) + break; + iff = entry->in(0)->as_If(); + if (iff->in(1)->Opcode() == Op_Opaque4) { + Node_Stack to_clone(2); + to_clone.push(iff->in(1), 1); + uint current = C->unique(); + Node* result = NULL; + // Look for the opaque node to replace with the init value + // and clone everything in between. We keep the Opaque4 node + // so the duplicated predicates are eliminated once loop + // opts are over: they are here only to keep the IR graph + // consistent. + do { + Node* n = to_clone.node(); + uint i = to_clone.index(); + Node* m = n->in(i); + int op = m->Opcode(); + if (m->is_Bool() || + m->is_Cmp() || + op == Op_AndL || + op == Op_OrL || + op == Op_RShiftL || + op == Op_LShiftL || + op == Op_AddL || + op == Op_AddI || + op == Op_MulL || + op == Op_MulI || + op == Op_SubL || + op == Op_SubI || + op == Op_ConvI2L) { + to_clone.push(m, 1); + continue; + } + if (op == Op_Opaque1) { + if (n->_idx < current) { + n = n->clone(); + } + n->set_req(i, castii); + register_new_node(n, min_taken); + to_clone.set_node(n); + } + for (;;) { + Node* cur = to_clone.node(); + uint j = to_clone.index(); + if (j+1 < cur->req()) { + to_clone.set_index(j+1); + break; + } + to_clone.pop(); + if (to_clone.size() == 0) { + result = cur; + break; + } + Node* next = to_clone.node(); + j = to_clone.index(); + if (cur->_idx >= current) { + if (next->_idx < current) { + next = next->clone(); + register_new_node(next, min_taken); + to_clone.set_node(next); + } + assert(next->in(j) != cur, "input should have been cloned"); + next->set_req(j, cur); + } + } + } while (result == NULL); + assert(result->_idx >= current, "new node expected"); + + Node* proj = entry->clone(); + Node* other_proj = uncommon_proj->clone(); + Node* new_iff = iff->clone(); + new_iff->set_req(1, result); + proj->set_req(0, new_iff); + other_proj->set_req(0, new_iff); + Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr); + register_new_node(frame, C->start()); + // It's impossible for the predicate to fail at runtime. Use + // an Halt node. + Node* halt = new HaltNode(other_proj, frame); + C->root()->add_req(halt); + new_iff->set_req(0, prev_proj); + + register_control(new_iff, outer_loop->_parent, prev_proj); + register_control(proj, outer_loop->_parent, new_iff); + register_control(other_proj, _ltree_root, new_iff); + register_control(halt, _ltree_root, other_proj); + + prev_proj = proj; + } + entry = entry->in(0)->in(0); + } + _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj); + set_idom(outer_main_head, prev_proj, dd_main_head); + } + } } //------------------------------insert_pre_post_loops-------------------------- @@ -1137,8 +1265,9 @@ // dependencies. // CastII for the main loop: - bool inserted = cast_incr_before_loop( pre_incr, min_taken, main_head ); - assert(inserted, "no castII inserted"); + Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head ); + assert(castii != NULL, "no castII inserted"); + duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head); // Step B4: Shorten the pre-loop to run only 1 iteration (for now). // RCE and alignment may change this later. @@ -1403,8 +1532,8 @@ } // CastII for the new post loop: - bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head); - assert(inserted, "no castII inserted"); + Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head); + assert(castii != NULL, "no castII inserted"); return new_main_exit; } @@ -1467,7 +1596,7 @@ if (!is_canonical_loop_entry(loop_head)) { return; } - opaq = ctrl->in(0)->in(1)->in(1)->in(2); + opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2); // Zero-trip test uses an 'opaque' node which is not shared. assert(opaq->outcnt() == 1 && opaq->in(1) == limit, ""); } @@ -2034,6 +2163,34 @@ return false; } +// Same as PhaseIdealLoop::duplicate_predicates() but for range checks +// eliminated by iteration splitting. +Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl, + Node* predicate_proj, int scale_con, Node* offset, + Node* limit, jint stride_con) { + bool overflow = false; + BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, cl->init_trip(), NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow); + Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); + register_new_node(opaque_bol, predicate_proj); + IfNode* new_iff = NULL; + if (overflow) { + new_iff = new IfNode(predicate_proj, bol, PROB_MAX, COUNT_UNKNOWN); + } else { + new_iff = new RangeCheckNode(predicate_proj, bol, PROB_MAX, COUNT_UNKNOWN); + } + register_control(new_iff, loop->_parent, predicate_proj); + Node* iffalse = new IfFalseNode(new_iff); + register_control(iffalse, _ltree_root, new_iff); + ProjNode* iftrue = new IfTrueNode(new_iff); + register_control(iftrue, loop->_parent, new_iff); + Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr); + register_new_node(frame, C->start()); + Node* halt = new HaltNode(iffalse, frame); + register_control(halt, _ltree_root, iffalse); + C->root()->add_req(halt); + return iftrue; +} + //------------------------------do_range_check--------------------------------- // Eliminate range-checks and other trip-counter vs loop-invariant tests. int PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) { @@ -2069,7 +2226,7 @@ } // Need to find the main-loop zero-trip guard - Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); + Node *ctrl = cl->skip_predicates(); Node *iffm = ctrl->in(0); Node *opqzm = iffm->in(1)->in(1)->in(2); assert(opqzm->in(1) == main_limit, "do not understand situation"); @@ -2124,6 +2281,8 @@ // the loop is in canonical form to multiversion. closed_range_checks = 0; + Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl); + assert(predicate_proj->is_Proj() && predicate_proj->in(0)->is_If(), "if projection only"); // Check loop body for tests of trip-counter plus loop-invariant vs loop-variant. for( uint i = 0; i < loop->_body.size(); i++ ) { Node *iff = loop->_body[i]; @@ -2168,7 +2327,7 @@ // 'limit' maybe pinned below the zero trip test (probably from a // previous round of rce), in which case, it can't be used in the // zero trip test expression which must occur before the zero test's if. - if( limit_c == ctrl ) { + if (is_dominator(ctrl, limit_c)) { continue; // Don't rce this check but continue looking for other candidates. } @@ -2186,7 +2345,7 @@ // As above for the 'limit', the 'offset' maybe pinned below the // zero trip test. - if( offset_c == ctrl ) { + if (is_dominator(ctrl, offset_c)) { continue; // Don't rce this check but continue looking for other candidates. } #ifdef ASSERT @@ -2209,6 +2368,7 @@ add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit ); // (0-offset)/scale could be outside of loop iterations range. conditional_rc = true; + predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con); } else { if (PrintOpto) { tty->print_cr("missed RCE opportunity"); @@ -2278,6 +2438,10 @@ } // End of is IF } + if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) { + _igvn.replace_input_of(cl->skip_strip_mined(), LoopNode::EntryControl, predicate_proj); + set_idom(cl->skip_strip_mined(), predicate_proj, dom_depth(cl->skip_strip_mined())); + } // Update loop limits if (conditional_rc) { @@ -2540,7 +2704,7 @@ #ifdef ASSERT static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) { - Node *ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); + Node *ctrl = cl->skip_predicates(); assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, ""); Node *iffm = ctrl->in(0); assert(iffm->Opcode() == Op_If, ""); @@ -2579,7 +2743,7 @@ } assert(locate_pre_from_main(main_head) == cl, "bad main loop"); - Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0); + Node* main_iff = main_head->skip_predicates()->in(0); // Remove the Opaque1Node of the pre loop and make it execute all iterations phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2)); @@ -2640,7 +2804,7 @@ } if (needs_guard) { // Check for an obvious zero trip guard. - Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl)); + Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_predicates()); if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) { bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse); // The test should look like just the backedge of a CountedLoop diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/loopnode.cpp --- a/src/hotspot/share/opto/loopnode.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/loopnode.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -1158,9 +1158,9 @@ return NULL; } -LoopNode* CountedLoopNode::skip_strip_mined(int expect_opaq) { +LoopNode* CountedLoopNode::skip_strip_mined(int expect_skeleton) { if (is_strip_mined()) { - verify_strip_mined(expect_opaq); + verify_strip_mined(expect_skeleton); return in(EntryControl)->as_Loop(); } return this; @@ -1252,6 +1252,20 @@ return l->outer_safepoint(); } +Node* CountedLoopNode::skip_predicates() { + if (is_main_loop()) { + Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl); + while (ctrl != NULL && ctrl->is_Proj() && ctrl->in(0)->is_If() && + ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->outcnt() == 1 && + ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->unique_out()->Opcode() == Op_Halt) { + ctrl = ctrl->in(0)->in(0); + } + + return ctrl; + } + return in(LoopNode::EntryControl); +} + void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) { // Look for the outer & inner strip mined loop, reduce number of // iterations of the inner loop, set exit condition of outer loop, @@ -3770,7 +3784,8 @@ if (!cl->is_main_loop() && !cl->is_post_loop()) { return false; } - Node* ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl); + Node* ctrl = cl->skip_predicates(); + if (ctrl == NULL || (!ctrl->is_IfTrue() && !ctrl->is_IfFalse())) { return false; } diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/loopnode.hpp --- a/src/hotspot/share/opto/loopnode.hpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/loopnode.hpp Thu Mar 22 20:21:19 2018 -0700 @@ -138,7 +138,7 @@ #endif void verify_strip_mined(int expect_skeleton) const; - virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; } + virtual LoopNode* skip_strip_mined(int expect_skeleton = 1) { return this; } virtual IfTrueNode* outer_loop_tail() const { ShouldNotReachHere(); return NULL; } virtual OuterStripMinedLoopEndNode* outer_loop_end() const { ShouldNotReachHere(); return NULL; } virtual IfFalseNode* outer_loop_exit() const { ShouldNotReachHere(); return NULL; } @@ -298,6 +298,11 @@ virtual IfFalseNode* outer_loop_exit() const; virtual SafePointNode* outer_safepoint() const; + // If this is a main loop in a pre/main/post loop nest, walk over + // the predicates that were inserted by + // duplicate_predicates()/add_range_check_predicate() + Node* skip_predicates(); + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif @@ -724,7 +729,10 @@ return ctrl; } - bool cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop); + Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop); + void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii, + IdealLoopTree* outer_loop, LoopNode* outer_main_head, + uint dd_main_head); public: @@ -1067,6 +1075,15 @@ // Implementation of the loop predication to promote checks outside the loop bool loop_predication_impl(IdealLoopTree *loop); + ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop, + ProjNode* proj, ProjNode *predicate_proj, + ProjNode* upper_bound_proj, + int scale, Node* offset, + Node* init, Node* limit, jint stride, + Node* rng, bool& overflow); + Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl, + Node* predicate_proj, int scale_con, Node* offset, + Node* limit, jint stride_con); // Helper function to collect predicate for eliminating the useless ones void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/macro.cpp --- a/src/hotspot/share/opto/macro.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/macro.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -2668,8 +2668,7 @@ assert(n->Opcode() == Op_LoopLimit || n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2 || - n->Opcode() == Op_Opaque3 || - n->Opcode() == Op_Opaque4, "unknown node type in macro list"); + n->Opcode() == Op_Opaque3, "unknown node type in macro list"); } assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); progress = progress || success; @@ -2734,9 +2733,6 @@ _igvn.replace_node(n, repl); success = true; #endif - } else if (n->Opcode() == Op_Opaque4) { - _igvn.replace_node(n, n->in(2)); - success = true; } else if (n->Opcode() == Op_OuterStripMinedLoop) { n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn); C->remove_macro_node(n); diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/node.cpp --- a/src/hotspot/share/opto/node.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/node.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -504,6 +504,9 @@ if (cast != NULL && cast->has_range_check()) { C->add_range_check_cast(cast); } + if (n->Opcode() == Op_Opaque4) { + C->add_opaque4_node(n); + } n->set_idx(C->next_unique()); // Get new unique index as well debug_only( n->verify_construction() ); @@ -612,6 +615,9 @@ if (cast != NULL && cast->has_range_check()) { compile->remove_range_check_cast(cast); } + if (Opcode() == Op_Opaque4) { + compile->remove_opaque4_node(this); + } if (is_SafePoint()) { as_SafePoint()->delete_replaced_nodes(); @@ -1352,6 +1358,9 @@ if (cast != NULL && cast->has_range_check()) { igvn->C->remove_range_check_cast(cast); } + if (dead->Opcode() == Op_Opaque4) { + igvn->C->remove_range_check_cast(dead); + } igvn->C->record_dead_node(dead->_idx); // Kill all inputs to the dead guy for (uint i=0; i < dead->req(); i++) { diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/opaquenode.cpp --- a/src/hotspot/share/opto/opaquenode.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/opaquenode.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -60,10 +60,6 @@ return (&n == this); // Always fail except on self } -Node* Opaque4Node::Identity(PhaseGVN* phase) { - return phase->C->major_progress() ? this : in(2); -} - const Type* Opaque4Node::Value(PhaseGVN* phase) const { return phase->type(in(1)); } diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/opaquenode.hpp --- a/src/hotspot/share/opto/opaquenode.hpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/opaquenode.hpp Thu Mar 22 20:21:19 2018 -0700 @@ -35,14 +35,14 @@ virtual uint hash() const ; // { return NO_HASH; } virtual uint cmp( const Node &n ) const; public: - Opaque1Node( Compile* C, Node *n ) : Node(0,n) { + Opaque1Node(Compile* C, Node *n) : Node(NULL, n) { // Put it on the Macro nodes list to removed during macro nodes expansion. init_flags(Flag_is_macro); C->add_macro_node(this); } // Special version for the pre-loop to hold the original loop limit // which is consumed by range check elimination. - Opaque1Node( Compile* C, Node *n, Node* orig_limit ) : Node(0,n,orig_limit) { + Opaque1Node(Compile* C, Node *n, Node* orig_limit) : Node(NULL, n, orig_limit) { // Put it on the Macro nodes list to removed during macro nodes expansion. init_flags(Flag_is_macro); C->add_macro_node(this); @@ -87,25 +87,23 @@ bool rtm_opt() const { return (_opt == RTM_OPT); } }; -// Used by GraphKit::must_be_not_null(): input 1 is a check that we -// know implicitly is always true or false but the compiler has no way -// to prove. If during optimizations, that check becomes true or -// false, the Opaque4 node is replaced by that constant true or -// false. Input 2 is the constant value we know the test takes. After -// loop optimizations, we replace input 1 by input 2 so the control -// that depends on that test can be removed and there's no overhead at -// runtime. +// Input 1 is a check that we know implicitly is always true or false +// but the compiler has no way to prove. If during optimizations, that +// check becomes true or false, the Opaque4 node is replaced by that +// constant true or false. Input 2 is the constant value we know the +// test takes. After loop optimizations, we replace input 1 by input 2 +// so the control that depends on that test can be removed and there's +// no overhead at runtime. Used for instance by +// GraphKit::must_be_not_null(). class Opaque4Node : public Node { public: - Opaque4Node(Compile* C, Node *tst, Node* final_tst) : Node(0, tst, final_tst) { - // Put it on the Macro nodes list to removed during macro nodes expansion. - init_flags(Flag_is_macro); - C->add_macro_node(this); + Opaque4Node(Compile* C, Node *tst, Node* final_tst) : Node(NULL, tst, final_tst) { + // Put it on the Opaque4 nodes list to be removed after all optimizations + C->add_opaque4_node(this); } virtual int Opcode() const; virtual const Type *bottom_type() const { return TypeInt::BOOL; } virtual const Type* Value(PhaseGVN* phase) const; - virtual Node* Identity(PhaseGVN* phase); }; diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/phaseX.cpp --- a/src/hotspot/share/opto/phaseX.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/phaseX.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -1421,6 +1421,9 @@ if (cast != NULL && cast->has_range_check()) { C->remove_range_check_cast(cast); } + if (dead->Opcode() == Op_Opaque4) { + C->remove_opaque4_node(dead); + } } } // while (_stack.is_nonempty()) } diff -r a3f1db30ab85 -r bde392011cd8 src/hotspot/share/opto/superword.cpp --- a/src/hotspot/share/opto/superword.cpp Thu Mar 22 16:39:02 2018 -0700 +++ b/src/hotspot/share/opto/superword.cpp Thu Mar 22 20:21:19 2018 -0700 @@ -3328,7 +3328,7 @@ return NULL; } - Node* p_f = cl->skip_strip_mined()->in(LoopNode::EntryControl)->in(0)->in(0); + Node* p_f = cl->skip_predicates()->in(0)->in(0); if (!p_f->is_IfFalse()) return NULL; if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd(); diff -r a3f1db30ab85 -r bde392011cd8 test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java Thu Mar 22 20:21:19 2018 -0700 @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8193130 + * @summary Bad graph when unrolled loop bounds conflicts with range checks + * + * @run main/othervm IterationSplitPredicateInconsistency + * @run main/othervm -XX:-UseLoopPredicate IterationSplitPredicateInconsistency + * + */ + +public class IterationSplitPredicateInconsistency { + static volatile int barrier; + + // Pollute profile so loop appears to run for a large number of iterations + static boolean test1_helper(int start, int stop, double[] array1, double[] array2, int exit) { + for (int i = start; i < stop; i++) { + array1[i] = array2[i]; + if (i == exit) { + return true; + } + barrier = 0x42; + } + return false; + } + + static double[] test1(int start, double[] array2, int exit) { + double[] array1 = new double[10]; + // Predication moves range checks out of loop and + // pre/main/post loops are created. The main loop is unrolled + // several times to the point where it's never executed but + // compiler can't tell from the loop bounds alone. The lower + // bound of the loop is negative and would cause range checks + // (that were removed from the loop body) to fail. + if (test1_helper(start, 5, array1, array2, exit)) { + return null; + } + return array1; + } + + // Same as above with other combinations of increasing/decreasing + // loops, positive/negative stride + static boolean test2_helper(int start, int stop, double[] array1, double[] array2, int exit) { + for (int i = start-1; i >= stop; i--) { + array1[i] = array2[i]; + if (i == exit) { + return true; + } + barrier = 0x42; + } + return false; + } + + static double[] test2(int start, double[] array2, int exit) { + double[] array1 = new double[10]; + if (test2_helper(start, 0, array1, array2, exit)) { + return null; + } + return array1; + } + + static boolean test3_helper(int start, int stop, double[] array1, double[] array2, int exit) { + for (int i = start; i < stop; i++) { + array1[stop-i-1] = array2[stop-i-1]; + if (i == exit) { + return true; + } + barrier = 0x42; + } + return false; + } + + static double[] test3(int start, double[] array2, int exit) { + double[] array1 = new double[5]; + if (test3_helper(start, 5, array1, array2, exit)) { + return null; + } + return array1; + } + + static boolean test4_helper(int start, int stop, int from, double[] array1, double[] array2, int exit) { + for (int i = start-1; i >= stop; i--) { + array1[from-i-1] = array2[from-i-1]; + if (i == exit) { + return true; + } + barrier = 0x42; + } + return false; + } + + static double[] test4(int start, double[] array2, int exit) { + double[] array1 = new double[5]; + if (test4_helper(start, 0, 5, array1, array2, exit)) { + return null; + } + return array1; + } + + public static void main(String[] args) { + double[] array2 = new double[10]; + double[] array3 = new double[1000]; + for (int i = 0; i < 20_000; i++) { + test1_helper(0, 1000, array3, array3, 998); + test1(0, array2, 999); + test1(0, array2, 4); + test2_helper(1000, 0, array3, array3, 1); + test2(5, array2, 999); + test2(5, array2, 1); + test3_helper(0, 1000, array3, array3, 998); + test3(0, array2, 999); + test3(0, array2, 4); + test4_helper(1000, 0, 1000, array3, array3, 1); + test4(5, array2, 999); + test4(5, array2, 1); + } + } +}