8193130: Bad graph when unrolled loop bounds conflicts with range checks
authorroland
Thu, 22 Mar 2018 20:21:19 -0700
changeset 49487 bde392011cd8
parent 49486 a3f1db30ab85
child 49488 1f9dd2360b17
child 49590 66c32f2a7f10
8193130: Bad graph when unrolled loop bounds conflicts with range checks Reviewed-by: kvn, thartmann
src/hotspot/share/opto/compile.cpp
src/hotspot/share/opto/compile.hpp
src/hotspot/share/opto/loopPredicate.cpp
src/hotspot/share/opto/loopTransform.cpp
src/hotspot/share/opto/loopnode.cpp
src/hotspot/share/opto/loopnode.hpp
src/hotspot/share/opto/macro.cpp
src/hotspot/share/opto/node.cpp
src/hotspot/share/opto/opaquenode.cpp
src/hotspot/share/opto/opaquenode.hpp
src/hotspot/share/opto/phaseX.cpp
src/hotspot/share/opto/superword.cpp
test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java
--- a/src/hotspot/share/opto/compile.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/compile.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -397,13 +397,20 @@
       remove_range_check_cast(cast);
     }
   }
-  // Remove useless expensive node
+  // Remove useless expensive nodes
   for (int i = C->expensive_count()-1; i >= 0; i--) {
     Node* n = C->expensive_node(i);
     if (!useful.member(n)) {
       remove_expensive_node(n);
     }
   }
+  // Remove useless Opaque4 nodes
+  for (int i = opaque4_count() - 1; i >= 0; i--) {
+    Node* opaq = opaque4_node(i);
+    if (!useful.member(opaq)) {
+      remove_opaque4_node(opaq);
+    }
+  }
   // clean up the late inline lists
   remove_useless_late_inlines(&_string_late_inlines, useful);
   remove_useless_late_inlines(&_boxing_late_inlines, useful);
@@ -1179,6 +1186,7 @@
   _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   _range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
+  _opaque4_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   register_library_intrinsics();
 }
 
@@ -1957,6 +1965,22 @@
   assert(range_check_cast_count() == 0, "should be empty");
 }
 
+void Compile::add_opaque4_node(Node* n) {
+  assert(n->Opcode() == Op_Opaque4, "Opaque4 only");
+  assert(!_opaque4_nodes->contains(n), "duplicate entry in Opaque4 list");
+  _opaque4_nodes->append(n);
+}
+
+// Remove all Opaque4 nodes.
+void Compile::remove_opaque4_nodes(PhaseIterGVN &igvn) {
+  for (int i = opaque4_count(); i > 0; i--) {
+    Node* opaq = opaque4_node(i-1);
+    assert(opaq->Opcode() == Op_Opaque4, "Opaque4 only");
+    igvn.replace_node(opaq, opaq->in(2));
+  }
+  assert(opaque4_count() == 0, "should be empty");
+}
+
 // StringOpts and late inlining of string methods
 void Compile::inline_string_calls(bool parse_time) {
   {
@@ -2332,6 +2356,11 @@
     }
   }
 
+  if (opaque4_count() > 0) {
+    C->remove_opaque4_nodes(igvn);
+    igvn.optimize();
+  }
+
   DEBUG_ONLY( _modified_nodes = NULL; )
  } // (End scope of igvn; run destructor if necessary for asserts.)
 
@@ -3332,6 +3361,20 @@
     }
     break;
   }
+  case Op_CmpUL: {
+    if (!Matcher::has_match_rule(Op_CmpUL)) {
+      // We don't support unsigned long comparisons. Set 'max_idx_expr'
+      // to max_julong if < 0 to make the signed comparison fail.
+      ConINode* sign_pos = new ConINode(TypeInt::make(BitsPerLong - 1));
+      Node* sign_bit_mask = new RShiftLNode(n->in(1), sign_pos);
+      Node* orl = new OrLNode(n->in(1), sign_bit_mask);
+      ConLNode* remove_sign_mask = new ConLNode(TypeLong::make(max_jlong));
+      Node* andl = new AndLNode(orl, remove_sign_mask);
+      Node* cmp = new CmpLNode(andl, n->in(2));
+      n->subsume_by(cmp, this);
+    }
+    break;
+  }
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
--- a/src/hotspot/share/opto/compile.hpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/compile.hpp	Thu Mar 22 20:21:19 2018 -0700
@@ -416,6 +416,7 @@
   GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
   GrowableArray<Node*>* _expensive_nodes;       // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
   GrowableArray<Node*>* _range_check_casts;     // List of CastII nodes with a range check dependency
+  GrowableArray<Node*>* _opaque4_nodes;         // List of Opaque4 nodes that have a default value
   ConnectionGraph*      _congraph;
 #ifndef PRODUCT
   IdealGraphPrinter*    _printer;
@@ -810,6 +811,16 @@
   // Remove all range check dependent CastIINodes.
   void  remove_range_check_casts(PhaseIterGVN &igvn);
 
+  void add_opaque4_node(Node* n);
+  void remove_opaque4_node(Node* n) {
+    if (_opaque4_nodes->contains(n)) {
+      _opaque4_nodes->remove(n);
+    }
+  }
+  Node* opaque4_node(int idx) const { return _opaque4_nodes->at(idx);  }
+  int   opaque4_count()       const { return _opaque4_nodes->length(); }
+  void  remove_opaque4_nodes(PhaseIterGVN &igvn);
+
   // remove the opaque nodes that protect the predicates so that the unused checks and
   // uncommon traps will be eliminated from the graph.
   void cleanup_loop_predicates(PhaseIterGVN &igvn);
--- a/src/hotspot/share/opto/loopPredicate.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/loopPredicate.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -632,7 +632,7 @@
                                        int scale, Node* offset,
                                        Node* init, Node* limit, jint stride,
                                        Node* range, bool upper, bool &overflow) {
-  jint con_limit  = limit->is_Con()  ? limit->get_int()  : 0;
+  jint con_limit  = (limit != NULL && limit->is_Con())  ? limit->get_int()  : 0;
   jint con_init   = init->is_Con()   ? init->get_int()   : 0;
   jint con_offset = offset->is_Con() ? offset->get_int() : 0;
 
@@ -751,26 +751,7 @@
     // Integer expressions may overflow, do long comparison
     range = new ConvI2LNode(range);
     register_new_node(range, ctrl);
-    if (!Matcher::has_match_rule(Op_CmpUL)) {
-      // We don't support unsigned long comparisons. Set 'max_idx_expr'
-      // to max_julong if < 0 to make the signed comparison fail.
-      ConINode* sign_pos = _igvn.intcon(BitsPerLong - 1);
-      set_ctrl(sign_pos, C->root());
-      Node* sign_bit_mask = new RShiftLNode(max_idx_expr, sign_pos);
-      register_new_node(sign_bit_mask, ctrl);
-      // OR with sign bit to set all bits to 1 if negative (otherwise no change)
-      max_idx_expr = new OrLNode(max_idx_expr, sign_bit_mask);
-      register_new_node(max_idx_expr, ctrl);
-      // AND with 0x7ff... to unset the sign bit
-      ConLNode* remove_sign_mask = _igvn.longcon(max_jlong);
-      set_ctrl(remove_sign_mask, C->root());
-      max_idx_expr = new AndLNode(max_idx_expr, remove_sign_mask);
-      register_new_node(max_idx_expr, ctrl);
-
-      cmp = new CmpLNode(max_idx_expr, range);
-    } else {
-      cmp = new CmpULNode(max_idx_expr, range);
-    }
+    cmp = new CmpULNode(max_idx_expr, range);
   } else {
     cmp = new CmpUNode(max_idx_expr, range);
   }
@@ -785,6 +766,29 @@
   return bol;
 }
 
+// After pre/main/post loops are created, we'll put a copy of some
+// range checks between the pre and main loop to validate the initial
+// value of the induction variable for the main loop. Make a copy of
+// the predicates here with an opaque node as a place holder for the
+// initial value.
+ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
+                                                    ProjNode* proj, ProjNode *predicate_proj,
+                                                    ProjNode* upper_bound_proj,
+                                                    int scale, Node* offset,
+                                                    Node* init, Node* limit, jint stride,
+                                                    Node* rng, bool &overflow) {
+  assert(proj->_con && predicate_proj->_con, "not a range check?");
+  Node* opaque_init = new Opaque1Node(C, init);
+  register_new_node(opaque_init, upper_bound_proj);
+  BoolNode* bol = rc_predicate(loop, upper_bound_proj, scale, offset, opaque_init, limit, stride, rng, (stride > 0) != (scale > 0), overflow);
+  Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1)); // This will go away once loop opts are over
+  register_new_node(opaque_bol, upper_bound_proj);
+  ProjNode* new_proj = create_new_if_for_predicate(predicate_proj, NULL, Deoptimization::Reason_predicate, overflow ? Op_If : iff->Opcode());
+  _igvn.replace_input_of(new_proj->in(0), 1, opaque_bol);
+  assert(opaque_init->outcnt() > 0, "should be used");
+  return new_proj;
+}
+
 //------------------------------ loop_predication_impl--------------------------
 // Insert loop predicates for null checks and range checks
 bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
@@ -980,6 +984,10 @@
       // any dependent nodes onto the upper bound test.
       new_predicate_proj = upper_bound_proj;
 
+      if (iff->is_RangeCheck()) {
+        new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow);
+      }
+
 #ifndef PRODUCT
       if (TraceLoopOpts && !TraceLoopPredicate) {
         tty->print("Predicate RC ");
--- a/src/hotspot/share/opto/loopTransform.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/loopTransform.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -982,7 +982,7 @@
   return n;
 }
 
-bool PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
+Node* PhaseIdealLoop::cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop) {
   Node* castii = new CastIINode(incr, TypeInt::INT, true);
   castii->set_req(0, ctrl);
   register_new_node(castii, ctrl);
@@ -990,10 +990,138 @@
     Node* n = incr->fast_out(i);
     if (n->is_Phi() && n->in(0) == loop) {
       int nrep = n->replace_edge(incr, castii);
-      return true;
+      return castii;
     }
   }
-  return false;
+  return NULL;
+}
+
+// Make a copy of the skeleton range check predicates before the main
+// loop and set the initial value of loop as input. After unrolling,
+// the range of values for the induction variable in the main loop can
+// fall outside the allowed range of values by the array access (main
+// loop is never executed). When that happens, range check
+// CastII/ConvI2L nodes cause some data paths to die. For consistency,
+// the control paths must die too but the range checks were removed by
+// predication. The range checks that we add here guarantee that they
+// do.
+void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii,
+                                          IdealLoopTree* outer_loop, LoopNode* outer_main_head,
+                                          uint dd_main_head) {
+  if (UseLoopPredicate) {
+    Node* entry = pre_head->in(LoopNode::EntryControl);
+    Node* predicate = NULL;
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL) {
+      entry = entry->in(0)->in(0);
+    }
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
+    if (predicate != NULL) {
+      IfNode* iff = entry->in(0)->as_If();
+      ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
+      Node* rgn = uncommon_proj->unique_ctrl_out();
+      assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+      assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
+      entry = entry->in(0)->in(0);
+      Node* prev_proj = min_taken;
+      while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
+        uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
+        if (uncommon_proj->unique_ctrl_out() != rgn)
+          break;
+        iff = entry->in(0)->as_If();
+        if (iff->in(1)->Opcode() == Op_Opaque4) {
+          Node_Stack to_clone(2);
+          to_clone.push(iff->in(1), 1);
+          uint current = C->unique();
+          Node* result = NULL;
+          // Look for the opaque node to replace with the init value
+          // and clone everything in between. We keep the Opaque4 node
+          // so the duplicated predicates are eliminated once loop
+          // opts are over: they are here only to keep the IR graph
+          // consistent.
+          do {
+            Node* n = to_clone.node();
+            uint i = to_clone.index();
+            Node* m = n->in(i);
+            int op = m->Opcode();
+            if (m->is_Bool() ||
+                m->is_Cmp() ||
+                op == Op_AndL ||
+                op == Op_OrL ||
+                op == Op_RShiftL ||
+                op == Op_LShiftL ||
+                op == Op_AddL ||
+                op == Op_AddI ||
+                op == Op_MulL ||
+                op == Op_MulI ||
+                op == Op_SubL ||
+                op == Op_SubI ||
+                op == Op_ConvI2L) {
+              to_clone.push(m, 1);
+              continue;
+            }
+            if (op == Op_Opaque1) {
+              if (n->_idx < current) {
+                n = n->clone();
+              }
+              n->set_req(i, castii);
+              register_new_node(n, min_taken);
+              to_clone.set_node(n);
+            }
+            for (;;) {
+              Node* cur = to_clone.node();
+              uint j = to_clone.index();
+              if (j+1 < cur->req()) {
+                to_clone.set_index(j+1);
+                break;
+              }
+              to_clone.pop();
+              if (to_clone.size() == 0) {
+                result = cur;
+                break;
+              }
+              Node* next = to_clone.node();
+              j = to_clone.index();
+              if (cur->_idx >= current) {
+                if (next->_idx < current) {
+                  next = next->clone();
+                  register_new_node(next, min_taken);
+                  to_clone.set_node(next);
+                }
+                assert(next->in(j) != cur, "input should have been cloned");
+                next->set_req(j, cur);
+              }
+            }
+          } while (result == NULL);
+          assert(result->_idx >= current, "new node expected");
+
+          Node* proj = entry->clone();
+          Node* other_proj = uncommon_proj->clone();
+          Node* new_iff = iff->clone();
+          new_iff->set_req(1, result);
+          proj->set_req(0, new_iff);
+          other_proj->set_req(0, new_iff);
+          Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+          register_new_node(frame, C->start());
+          // It's impossible for the predicate to fail at runtime. Use
+          // an Halt node.
+          Node* halt = new HaltNode(other_proj, frame);
+          C->root()->add_req(halt);
+          new_iff->set_req(0, prev_proj);
+
+          register_control(new_iff, outer_loop->_parent, prev_proj);
+          register_control(proj, outer_loop->_parent, new_iff);
+          register_control(other_proj, _ltree_root, new_iff);
+          register_control(halt, _ltree_root, other_proj);
+
+          prev_proj = proj;
+        }
+        entry = entry->in(0)->in(0);
+      }
+      _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
+      set_idom(outer_main_head, prev_proj, dd_main_head);
+    }
+  }
 }
 
 //------------------------------insert_pre_post_loops--------------------------
@@ -1137,8 +1265,9 @@
   // dependencies.
 
   // CastII for the main loop:
-  bool inserted = cast_incr_before_loop( pre_incr, min_taken, main_head );
-  assert(inserted, "no castII inserted");
+  Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
+  assert(castii != NULL, "no castII inserted");
+  duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
 
   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
   // RCE and alignment may change this later.
@@ -1403,8 +1532,8 @@
   }
 
   // CastII for the new post loop:
-  bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
-  assert(inserted, "no castII inserted");
+  Node* castii = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
+  assert(castii != NULL, "no castII inserted");
 
   return new_main_exit;
 }
@@ -1467,7 +1596,7 @@
     if (!is_canonical_loop_entry(loop_head)) {
       return;
     }
-    opaq = ctrl->in(0)->in(1)->in(1)->in(2);
+    opaq = loop_head->skip_predicates()->in(0)->in(1)->in(1)->in(2);
     // Zero-trip test uses an 'opaque' node which is not shared.
     assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
   }
@@ -2034,6 +2163,34 @@
   return false;
 }
 
+// Same as PhaseIdealLoop::duplicate_predicates() but for range checks
+// eliminated by iteration splitting.
+Node* PhaseIdealLoop::add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
+                                                Node* predicate_proj, int scale_con, Node* offset,
+                                                Node* limit, jint stride_con) {
+  bool overflow = false;
+  BoolNode* bol = rc_predicate(loop, predicate_proj, scale_con, offset, cl->init_trip(), NULL, stride_con, limit, (stride_con > 0) != (scale_con > 0), overflow);
+  Node* opaque_bol = new Opaque4Node(C, bol, _igvn.intcon(1));
+  register_new_node(opaque_bol, predicate_proj);
+  IfNode* new_iff = NULL;
+  if (overflow) {
+    new_iff = new IfNode(predicate_proj, bol, PROB_MAX, COUNT_UNKNOWN);
+  } else {
+    new_iff = new RangeCheckNode(predicate_proj, bol, PROB_MAX, COUNT_UNKNOWN);
+  }
+  register_control(new_iff, loop->_parent, predicate_proj);
+  Node* iffalse = new IfFalseNode(new_iff);
+  register_control(iffalse, _ltree_root, new_iff);
+  ProjNode* iftrue = new IfTrueNode(new_iff);
+  register_control(iftrue, loop->_parent, new_iff);
+  Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+  register_new_node(frame, C->start());
+  Node* halt = new HaltNode(iffalse, frame);
+  register_control(halt, _ltree_root, iffalse);
+  C->root()->add_req(halt);
+  return iftrue;
+}
+
 //------------------------------do_range_check---------------------------------
 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
 int PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
@@ -2069,7 +2226,7 @@
   }
 
   // Need to find the main-loop zero-trip guard
-  Node *ctrl  = cl->skip_strip_mined()->in(LoopNode::EntryControl);
+  Node *ctrl  = cl->skip_predicates();
   Node *iffm = ctrl->in(0);
   Node *opqzm = iffm->in(1)->in(1)->in(2);
   assert(opqzm->in(1) == main_limit, "do not understand situation");
@@ -2124,6 +2281,8 @@
   // the loop is in canonical form to multiversion.
   closed_range_checks = 0;
 
+  Node* predicate_proj = cl->skip_strip_mined()->in(LoopNode::EntryControl);
+  assert(predicate_proj->is_Proj() && predicate_proj->in(0)->is_If(), "if projection only");
   // Check loop body for tests of trip-counter plus loop-invariant vs loop-variant.
   for( uint i = 0; i < loop->_body.size(); i++ ) {
     Node *iff = loop->_body[i];
@@ -2168,7 +2327,7 @@
       // 'limit' maybe pinned below the zero trip test (probably from a
       // previous round of rce), in which case, it can't be used in the
       // zero trip test expression which must occur before the zero test's if.
-      if( limit_c == ctrl ) {
+      if (is_dominator(ctrl, limit_c)) {
         continue;  // Don't rce this check but continue looking for other candidates.
       }
 
@@ -2186,7 +2345,7 @@
 
       // As above for the 'limit', the 'offset' maybe pinned below the
       // zero trip test.
-      if( offset_c == ctrl ) {
+      if (is_dominator(ctrl, offset_c)) {
         continue; // Don't rce this check but continue looking for other candidates.
       }
 #ifdef ASSERT
@@ -2209,6 +2368,7 @@
           add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
           // (0-offset)/scale could be outside of loop iterations range.
           conditional_rc = true;
+          predicate_proj = add_range_check_predicate(loop, cl, predicate_proj, scale_con, offset, limit, stride_con);
         } else {
           if (PrintOpto) {
             tty->print_cr("missed RCE opportunity");
@@ -2278,6 +2438,10 @@
     } // End of is IF
 
   }
+  if (predicate_proj != cl->skip_strip_mined()->in(LoopNode::EntryControl)) {
+    _igvn.replace_input_of(cl->skip_strip_mined(), LoopNode::EntryControl, predicate_proj);
+    set_idom(cl->skip_strip_mined(), predicate_proj, dom_depth(cl->skip_strip_mined()));
+  }
 
   // Update loop limits
   if (conditional_rc) {
@@ -2540,7 +2704,7 @@
 
 #ifdef ASSERT
 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
-  Node *ctrl  = cl->skip_strip_mined()->in(LoopNode::EntryControl);
+  Node *ctrl  = cl->skip_predicates();
   assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
   Node *iffm = ctrl->in(0);
   assert(iffm->Opcode() == Op_If, "");
@@ -2579,7 +2743,7 @@
   }
 
   assert(locate_pre_from_main(main_head) == cl, "bad main loop");
-  Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0);
+  Node* main_iff = main_head->skip_predicates()->in(0);
 
   // Remove the Opaque1Node of the pre loop and make it execute all iterations
   phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
@@ -2640,7 +2804,7 @@
   }
   if (needs_guard) {
     // Check for an obvious zero trip guard.
-    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl));
+    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_predicates());
     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
       // The test should look like just the backedge of a CountedLoop
--- a/src/hotspot/share/opto/loopnode.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/loopnode.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -1158,9 +1158,9 @@
   return NULL;
 }
 
-LoopNode* CountedLoopNode::skip_strip_mined(int expect_opaq) {
+LoopNode* CountedLoopNode::skip_strip_mined(int expect_skeleton) {
   if (is_strip_mined()) {
-    verify_strip_mined(expect_opaq);
+    verify_strip_mined(expect_skeleton);
     return in(EntryControl)->as_Loop();
   }
   return this;
@@ -1252,6 +1252,20 @@
   return l->outer_safepoint();
 }
 
+Node* CountedLoopNode::skip_predicates() {
+  if (is_main_loop()) {
+    Node* ctrl = skip_strip_mined()->in(LoopNode::EntryControl);
+    while (ctrl != NULL && ctrl->is_Proj() && ctrl->in(0)->is_If() &&
+           ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->outcnt() == 1 &&
+           ctrl->in(0)->as_If()->proj_out(1-ctrl->as_Proj()->_con)->unique_out()->Opcode() == Op_Halt) {
+      ctrl = ctrl->in(0)->in(0);
+    }
+
+    return ctrl;
+  }
+  return in(LoopNode::EntryControl);
+}
+
 void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
   // Look for the outer & inner strip mined loop, reduce number of
   // iterations of the inner loop, set exit condition of outer loop,
@@ -3770,7 +3784,8 @@
   if (!cl->is_main_loop() && !cl->is_post_loop()) {
     return false;
   }
-  Node* ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
+  Node* ctrl = cl->skip_predicates();
+
   if (ctrl == NULL || (!ctrl->is_IfTrue() && !ctrl->is_IfFalse())) {
     return false;
   }
--- a/src/hotspot/share/opto/loopnode.hpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/loopnode.hpp	Thu Mar 22 20:21:19 2018 -0700
@@ -138,7 +138,7 @@
 #endif
 
   void verify_strip_mined(int expect_skeleton) const;
-  virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; }
+  virtual LoopNode* skip_strip_mined(int expect_skeleton = 1) { return this; }
   virtual IfTrueNode* outer_loop_tail() const { ShouldNotReachHere(); return NULL; }
   virtual OuterStripMinedLoopEndNode* outer_loop_end() const { ShouldNotReachHere(); return NULL; }
   virtual IfFalseNode* outer_loop_exit() const { ShouldNotReachHere(); return NULL; }
@@ -298,6 +298,11 @@
   virtual IfFalseNode* outer_loop_exit() const;
   virtual SafePointNode* outer_safepoint() const;
 
+  // If this is a main loop in a pre/main/post loop nest, walk over
+  // the predicates that were inserted by
+  // duplicate_predicates()/add_range_check_predicate()
+  Node* skip_predicates();
+
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
@@ -724,7 +729,10 @@
     return ctrl;
   }
 
-  bool cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
+  Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
+  void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii,
+                            IdealLoopTree* outer_loop, LoopNode* outer_main_head,
+                            uint dd_main_head);
 
 public:
 
@@ -1067,6 +1075,15 @@
 
   // Implementation of the loop predication to promote checks outside the loop
   bool loop_predication_impl(IdealLoopTree *loop);
+  ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
+                                      ProjNode* proj, ProjNode *predicate_proj,
+                                      ProjNode* upper_bound_proj,
+                                      int scale, Node* offset,
+                                      Node* init, Node* limit, jint stride,
+                                      Node* rng, bool& overflow);
+  Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
+                                  Node* predicate_proj, int scale_con, Node* offset,
+                                  Node* limit, jint stride_con);
 
   // Helper function to collect predicate for eliminating the useless ones
   void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
--- a/src/hotspot/share/opto/macro.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/macro.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -2668,8 +2668,7 @@
         assert(n->Opcode() == Op_LoopLimit ||
                n->Opcode() == Op_Opaque1   ||
                n->Opcode() == Op_Opaque2   ||
-               n->Opcode() == Op_Opaque3   ||
-               n->Opcode() == Op_Opaque4, "unknown node type in macro list");
+               n->Opcode() == Op_Opaque3, "unknown node type in macro list");
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
@@ -2734,9 +2733,6 @@
         _igvn.replace_node(n, repl);
         success = true;
 #endif
-      } else if (n->Opcode() == Op_Opaque4) {
-        _igvn.replace_node(n, n->in(2));
-        success = true;
       } else if (n->Opcode() == Op_OuterStripMinedLoop) {
         n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
         C->remove_macro_node(n);
--- a/src/hotspot/share/opto/node.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/node.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -504,6 +504,9 @@
   if (cast != NULL && cast->has_range_check()) {
     C->add_range_check_cast(cast);
   }
+  if (n->Opcode() == Op_Opaque4) {
+    C->add_opaque4_node(n);
+  }
 
   n->set_idx(C->next_unique()); // Get new unique index as well
   debug_only( n->verify_construction() );
@@ -612,6 +615,9 @@
   if (cast != NULL && cast->has_range_check()) {
     compile->remove_range_check_cast(cast);
   }
+  if (Opcode() == Op_Opaque4) {
+    compile->remove_opaque4_node(this);
+  }
 
   if (is_SafePoint()) {
     as_SafePoint()->delete_replaced_nodes();
@@ -1352,6 +1358,9 @@
       if (cast != NULL && cast->has_range_check()) {
         igvn->C->remove_range_check_cast(cast);
       }
+      if (dead->Opcode() == Op_Opaque4) {
+        igvn->C->remove_range_check_cast(dead);
+      }
       igvn->C->record_dead_node(dead->_idx);
       // Kill all inputs to the dead guy
       for (uint i=0; i < dead->req(); i++) {
--- a/src/hotspot/share/opto/opaquenode.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/opaquenode.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -60,10 +60,6 @@
   return (&n == this);          // Always fail except on self
 }
 
-Node* Opaque4Node::Identity(PhaseGVN* phase) {
-  return phase->C->major_progress() ? this : in(2);
-}
-
 const Type* Opaque4Node::Value(PhaseGVN* phase) const {
   return phase->type(in(1));
 }
--- a/src/hotspot/share/opto/opaquenode.hpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/opaquenode.hpp	Thu Mar 22 20:21:19 2018 -0700
@@ -35,14 +35,14 @@
   virtual uint hash() const ;                  // { return NO_HASH; }
   virtual uint cmp( const Node &n ) const;
   public:
-  Opaque1Node( Compile* C, Node *n ) : Node(0,n) {
+  Opaque1Node(Compile* C, Node *n) : Node(NULL, n) {
     // Put it on the Macro nodes list to removed during macro nodes expansion.
     init_flags(Flag_is_macro);
     C->add_macro_node(this);
   }
   // Special version for the pre-loop to hold the original loop limit
   // which is consumed by range check elimination.
-  Opaque1Node( Compile* C, Node *n, Node* orig_limit ) : Node(0,n,orig_limit) {
+  Opaque1Node(Compile* C, Node *n, Node* orig_limit) : Node(NULL, n, orig_limit) {
     // Put it on the Macro nodes list to removed during macro nodes expansion.
     init_flags(Flag_is_macro);
     C->add_macro_node(this);
@@ -87,25 +87,23 @@
   bool rtm_opt() const { return (_opt == RTM_OPT); }
 };
 
-// Used by GraphKit::must_be_not_null(): input 1 is a check that we
-// know implicitly is always true or false but the compiler has no way
-// to prove. If during optimizations, that check becomes true or
-// false, the Opaque4 node is replaced by that constant true or
-// false. Input 2 is the constant value we know the test takes. After
-// loop optimizations, we replace input 1 by input 2 so the control
-// that depends on that test can be removed and there's no overhead at
-// runtime.
+// Input 1 is a check that we know implicitly is always true or false
+// but the compiler has no way to prove. If during optimizations, that
+// check becomes true or false, the Opaque4 node is replaced by that
+// constant true or false. Input 2 is the constant value we know the
+// test takes. After loop optimizations, we replace input 1 by input 2
+// so the control that depends on that test can be removed and there's
+// no overhead at runtime. Used for instance by
+// GraphKit::must_be_not_null().
 class Opaque4Node : public Node {
   public:
-  Opaque4Node(Compile* C, Node *tst, Node* final_tst) : Node(0, tst, final_tst) {
-    // Put it on the Macro nodes list to removed during macro nodes expansion.
-    init_flags(Flag_is_macro);
-    C->add_macro_node(this);
+  Opaque4Node(Compile* C, Node *tst, Node* final_tst) : Node(NULL, tst, final_tst) {
+    // Put it on the Opaque4 nodes list to be removed after all optimizations
+    C->add_opaque4_node(this);
   }
   virtual int Opcode() const;
   virtual const Type *bottom_type() const { return TypeInt::BOOL; }
   virtual const Type* Value(PhaseGVN* phase) const;
-  virtual Node* Identity(PhaseGVN* phase);
 };
 
 
--- a/src/hotspot/share/opto/phaseX.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/phaseX.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -1421,6 +1421,9 @@
       if (cast != NULL && cast->has_range_check()) {
         C->remove_range_check_cast(cast);
       }
+      if (dead->Opcode() == Op_Opaque4) {
+        C->remove_opaque4_node(dead);
+      }
     }
   } // while (_stack.is_nonempty())
 }
--- a/src/hotspot/share/opto/superword.cpp	Thu Mar 22 16:39:02 2018 -0700
+++ b/src/hotspot/share/opto/superword.cpp	Thu Mar 22 20:21:19 2018 -0700
@@ -3328,7 +3328,7 @@
     return NULL;
   }
 
-  Node* p_f = cl->skip_strip_mined()->in(LoopNode::EntryControl)->in(0)->in(0);
+  Node* p_f = cl->skip_predicates()->in(0)->in(0);
   if (!p_f->is_IfFalse()) return NULL;
   if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
   CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java	Thu Mar 22 20:21:19 2018 -0700
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8193130
+ * @summary Bad graph when unrolled loop bounds conflicts with range checks
+ *
+ * @run main/othervm IterationSplitPredicateInconsistency
+ * @run main/othervm -XX:-UseLoopPredicate IterationSplitPredicateInconsistency
+ *
+ */
+
+public class IterationSplitPredicateInconsistency {
+    static volatile int barrier;
+
+    // Pollute profile so loop appears to run for a large number of iterations
+    static boolean test1_helper(int start, int stop, double[] array1, double[] array2, int exit) {
+        for (int i = start; i < stop; i++) {
+            array1[i] = array2[i];
+            if (i == exit) {
+                return true;
+            }
+            barrier = 0x42;
+        }
+        return false;
+    }
+
+    static double[] test1(int start, double[] array2, int exit) {
+        double[] array1 = new double[10];
+        // Predication moves range checks out of loop and
+        // pre/main/post loops are created. The main loop is unrolled
+        // several times to the point where it's never executed but
+        // compiler can't tell from the loop bounds alone. The lower
+        // bound of the loop is negative and would cause range checks
+        // (that were removed from the loop body) to fail.
+        if (test1_helper(start, 5, array1, array2, exit)) {
+            return null;
+        }
+        return array1;
+    }
+
+    // Same as above with other combinations of increasing/decreasing
+    // loops, positive/negative stride
+    static boolean test2_helper(int start, int stop, double[] array1, double[] array2, int exit) {
+        for (int i = start-1; i >= stop; i--) {
+            array1[i] = array2[i];
+            if (i == exit) {
+                return true;
+            }
+            barrier = 0x42;
+        }
+        return false;
+    }
+
+    static double[] test2(int start, double[] array2, int exit) {
+        double[] array1 = new double[10];
+        if (test2_helper(start, 0, array1, array2, exit)) {
+            return null;
+        }
+        return array1;
+    }
+
+    static boolean test3_helper(int start, int stop, double[] array1, double[] array2, int exit) {
+        for (int i = start; i < stop; i++) {
+            array1[stop-i-1] = array2[stop-i-1];
+            if (i == exit) {
+                return true;
+            }
+            barrier = 0x42;
+        }
+        return false;
+    }
+
+    static double[] test3(int start, double[] array2, int exit) {
+        double[] array1 = new double[5];
+        if (test3_helper(start, 5, array1, array2, exit)) {
+            return null;
+        }
+        return array1;
+    }
+
+    static boolean test4_helper(int start, int stop, int from, double[] array1, double[] array2, int exit) {
+        for (int i = start-1; i >= stop; i--) {
+            array1[from-i-1] = array2[from-i-1];
+            if (i == exit) {
+                return true;
+            }
+            barrier = 0x42;
+        }
+        return false;
+    }
+
+    static double[] test4(int start, double[] array2, int exit) {
+        double[] array1 = new double[5];
+        if (test4_helper(start, 0, 5, array1, array2, exit)) {
+            return null;
+        }
+        return array1;
+    }
+
+    public static void main(String[] args) {
+        double[] array2 = new double[10];
+        double[] array3 = new double[1000];
+        for (int i = 0; i < 20_000; i++) {
+            test1_helper(0, 1000, array3, array3, 998);
+            test1(0, array2, 999);
+            test1(0, array2, 4);
+            test2_helper(1000, 0, array3, array3, 1);
+            test2(5, array2, 999);
+            test2(5, array2, 1);
+            test3_helper(0, 1000, array3, array3, 998);
+            test3(0, array2, 999);
+            test3(0, array2, 4);
+            test4_helper(1000, 0, 1000, array3, array3, 1);
+            test4(5, array2, 999);
+            test4(5, array2, 1);
+        }
+    }
+}