8205034: [BACKOUT] Induction variable of over-unrolled loop conflicts with range checks
authorthartmann
Thu, 14 Jun 2018 11:22:04 +0200
changeset 50561 5756e8eecb17
parent 50560 dafb2cc6ba32
child 50562 3903ab54107e
8205034: [BACKOUT] Induction variable of over-unrolled loop conflicts with range checks Summary: Backout fix for JDK-8203915 because it causes SIGILL failures. Reviewed-by: shade
src/hotspot/share/opto/loopPredicate.cpp
src/hotspot/share/opto/loopTransform.cpp
src/hotspot/share/opto/loopnode.hpp
test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java
test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java
--- a/src/hotspot/share/opto/loopPredicate.cpp	Thu Jun 14 16:56:58 2018 +0900
+++ b/src/hotspot/share/opto/loopPredicate.cpp	Thu Jun 14 11:22:04 2018 +0200
@@ -89,7 +89,7 @@
 //
 //
 // We will create a region to guard the uct call if there is no one there.
-// The true projection (if_cont) of the new_iff is returned.
+// The true projecttion (if_cont) of the new_iff is returned.
 // This code is also used to clone predicates to cloned loops.
 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
                                                       Deoptimization::DeoptReason reason,
@@ -767,10 +767,10 @@
 }
 
 // After pre/main/post loops are created, we'll put a copy of some
-// range checks between the pre and main loop to validate the value
-// of the main loop induction variable. Make a copy of the predicates
-// here with an opaque node as a place holder for the value (will be
-// updated by PhaseIdealLoop::update_skeleton_predicate()).
+// range checks between the pre and main loop to validate the initial
+// value of the induction variable for the main loop. Make a copy of
+// the predicates here with an opaque node as a place holder for the
+// initial value.
 ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
                                                     ProjNode* proj, ProjNode *predicate_proj,
                                                     ProjNode* upper_bound_proj,
--- a/src/hotspot/share/opto/loopTransform.cpp	Thu Jun 14 16:56:58 2018 +0900
+++ b/src/hotspot/share/opto/loopTransform.cpp	Thu Jun 14 11:22:04 2018 +0200
@@ -1014,143 +1014,125 @@
 // loop is never executed). When that happens, range check
 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
 // the control paths must die too but the range checks were removed by
-// predication. The range checks that we add here guarantee that they do.
+// predication. The range checks that we add here guarantee that they
+// do.
 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii,
                                           IdealLoopTree* outer_loop, LoopNode* outer_main_head,
                                           uint dd_main_head) {
-  assert(UseLoopPredicate, "loop predicates must be enabled");
-  Node* entry = pre_head->in(LoopNode::EntryControl);
-  Node* predicate = NULL;
-  predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
-  if (predicate != NULL) {
-    entry = entry->in(0)->in(0);
-  }
-  predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
-  if (predicate != NULL) {
-    IfNode* iff = entry->in(0)->as_If();
-    ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
-    Node* rgn = uncommon_proj->unique_ctrl_out();
-    assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
-    assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
-    entry = entry->in(0)->in(0);
-    Node* prev_proj = min_taken;
-    while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
-      iff = entry->in(0)->as_If();
-      uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
-      if (uncommon_proj->unique_ctrl_out() != rgn)
-        break;
-      if (iff->in(1)->Opcode() == Op_Opaque4) {
-        // Clone the predicate twice and initialize one with the initial
-        // value of the loop induction variable. Leave the other predicate
-        // to be initialized when increasing the stride during loop unrolling.
-        prev_proj = update_skeleton_predicate(iff, castii, entry, uncommon_proj, min_taken, outer_loop, prev_proj);
-        Node* value = new Opaque1Node(C, castii);
-        register_new_node(value, min_taken);
-        prev_proj = update_skeleton_predicate(iff, value, entry, uncommon_proj, min_taken, outer_loop, prev_proj);
-        // Remove the skeleton predicate from the pre-loop
-        _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
-      }
+  if (UseLoopPredicate) {
+    Node* entry = pre_head->in(LoopNode::EntryControl);
+    Node* predicate = NULL;
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
+    if (predicate != NULL) {
       entry = entry->in(0)->in(0);
     }
-    _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
-    set_idom(outer_main_head, prev_proj, dd_main_head);
-  }
-}
-
-Node* PhaseIdealLoop::update_skeleton_predicate(Node* iff, Node* value, Node* entry, Node* uncommon_proj,
-                                                Node* min_taken, IdealLoopTree* outer_loop, Node* prev_proj) {
-  bool clone = (outer_loop != NULL); // Clone the predicate?
-  Node_Stack to_clone(2);
-  to_clone.push(iff->in(1), 1);
-  uint current = C->unique();
-  Node* result = NULL;
-  // Look for the opaque node to replace with the new value
-  // and clone everything in between. We keep the Opaque4 node
-  // so the duplicated predicates are eliminated once loop
-  // opts are over: they are here only to keep the IR graph
-  // consistent.
-  do {
-    Node* n = to_clone.node();
-    uint i = to_clone.index();
-    Node* m = n->in(i);
-    int op = m->Opcode();
-    if (m->is_Bool() ||
-        m->is_Cmp() ||
-        op == Op_AndL ||
-        op == Op_OrL ||
-        op == Op_RShiftL ||
-        op == Op_LShiftL ||
-        op == Op_AddL ||
-        op == Op_AddI ||
-        op == Op_MulL ||
-        op == Op_MulI ||
-        op == Op_SubL ||
-        op == Op_SubI ||
-        op == Op_ConvI2L) {
-      to_clone.push(m, 1);
-      continue;
-    }
-    if (op == Op_Opaque1) {
-      if (!clone) {
-        // Update the input of the Opaque1Node and exit
-        _igvn.replace_input_of(m, 1, value);
-        return prev_proj;
-      }
-      if (n->_idx < current) {
-        n = n->clone();
+    predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
+    if (predicate != NULL) {
+      IfNode* iff = entry->in(0)->as_If();
+      ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
+      Node* rgn = uncommon_proj->unique_ctrl_out();
+      assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
+      assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
+      entry = entry->in(0)->in(0);
+      Node* prev_proj = min_taken;
+      while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
+        uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
+        if (uncommon_proj->unique_ctrl_out() != rgn)
+          break;
+        iff = entry->in(0)->as_If();
+        if (iff->in(1)->Opcode() == Op_Opaque4) {
+          Node_Stack to_clone(2);
+          to_clone.push(iff->in(1), 1);
+          uint current = C->unique();
+          Node* result = NULL;
+          // Look for the opaque node to replace with the init value
+          // and clone everything in between. We keep the Opaque4 node
+          // so the duplicated predicates are eliminated once loop
+          // opts are over: they are here only to keep the IR graph
+          // consistent.
+          do {
+            Node* n = to_clone.node();
+            uint i = to_clone.index();
+            Node* m = n->in(i);
+            int op = m->Opcode();
+            if (m->is_Bool() ||
+                m->is_Cmp() ||
+                op == Op_AndL ||
+                op == Op_OrL ||
+                op == Op_RShiftL ||
+                op == Op_LShiftL ||
+                op == Op_AddL ||
+                op == Op_AddI ||
+                op == Op_MulL ||
+                op == Op_MulI ||
+                op == Op_SubL ||
+                op == Op_SubI ||
+                op == Op_ConvI2L) {
+              to_clone.push(m, 1);
+              continue;
+            }
+            if (op == Op_Opaque1) {
+              if (n->_idx < current) {
+                n = n->clone();
+              }
+              n->set_req(i, castii);
+              register_new_node(n, min_taken);
+              to_clone.set_node(n);
+            }
+            for (;;) {
+              Node* cur = to_clone.node();
+              uint j = to_clone.index();
+              if (j+1 < cur->req()) {
+                to_clone.set_index(j+1);
+                break;
+              }
+              to_clone.pop();
+              if (to_clone.size() == 0) {
+                result = cur;
+                break;
+              }
+              Node* next = to_clone.node();
+              j = to_clone.index();
+              if (cur->_idx >= current) {
+                if (next->_idx < current) {
+                  next = next->clone();
+                  register_new_node(next, min_taken);
+                  to_clone.set_node(next);
+                }
+                assert(next->in(j) != cur, "input should have been cloned");
+                next->set_req(j, cur);
+              }
+            }
+          } while (result == NULL);
+          assert(result->_idx >= current, "new node expected");
+
+          Node* proj = entry->clone();
+          Node* other_proj = uncommon_proj->clone();
+          Node* new_iff = iff->clone();
+          new_iff->set_req(1, result);
+          proj->set_req(0, new_iff);
+          other_proj->set_req(0, new_iff);
+          Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+          register_new_node(frame, C->start());
+          // It's impossible for the predicate to fail at runtime. Use
+          // an Halt node.
+          Node* halt = new HaltNode(other_proj, frame);
+          C->root()->add_req(halt);
+          new_iff->set_req(0, prev_proj);
+
+          register_control(new_iff, outer_loop->_parent, prev_proj);
+          register_control(proj, outer_loop->_parent, new_iff);
+          register_control(other_proj, _ltree_root, new_iff);
+          register_control(halt, _ltree_root, other_proj);
+
+          prev_proj = proj;
+        }
+        entry = entry->in(0)->in(0);
       }
-      n->set_req(i, value);
-      register_new_node(n, min_taken);
-      to_clone.set_node(n);
+      _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
+      set_idom(outer_main_head, prev_proj, dd_main_head);
     }
-    for (;;) {
-      Node* cur = to_clone.node();
-      uint j = to_clone.index();
-      if (j+1 < cur->req()) {
-        to_clone.set_index(j+1);
-        break;
-      }
-      to_clone.pop();
-      if (to_clone.size() == 0) {
-        result = cur;
-        break;
-      }
-      Node* next = to_clone.node();
-      j = to_clone.index();
-      if (clone && cur->_idx >= current) {
-        if (next->_idx < current) {
-          next = next->clone();
-          register_new_node(next, min_taken);
-          to_clone.set_node(next);
-        }
-        assert(next->in(j) != cur, "input should have been cloned");
-        next->set_req(j, cur);
-      }
-    }
-  } while (result == NULL);
-  if (!clone) {
-    return NULL;
   }
-  assert(result->_idx >= current, "new node expected");
-
-  Node* proj = entry->clone();
-  Node* other_proj = uncommon_proj->clone();
-  Node* new_iff = iff->clone();
-  new_iff->set_req(1, result);
-  proj->set_req(0, new_iff);
-  other_proj->set_req(0, new_iff);
-  Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
-  register_new_node(frame, C->start());
-  // It's impossible for the predicate to fail at runtime. Use an Halt node.
-  Node* halt = new HaltNode(other_proj, frame);
-  C->root()->add_req(halt);
-  new_iff->set_req(0, prev_proj);
-
-  register_control(new_iff, outer_loop->_parent, prev_proj);
-  register_control(proj, outer_loop->_parent, new_iff);
-  register_control(other_proj, _ltree_root, new_iff);
-  register_control(halt, _ltree_root, other_proj);
-  return proj;
 }
 
 //------------------------------insert_pre_post_loops--------------------------
@@ -1296,9 +1278,7 @@
   // CastII for the main loop:
   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
   assert(castii != NULL, "no castII inserted");
-  if (UseLoopPredicate) {
-    duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
-  }
+  duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
 
   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
   // RCE and alignment may change this later.
@@ -1642,26 +1622,6 @@
   assert(old_trip_count > 1 &&
       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
 
-  if (UseLoopPredicate) {
-    // Search for skeleton predicates and update them according to the new stride
-    Node* entry = ctrl;
-    while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
-      IfNode* iff = entry->in(0)->as_If();
-      ProjNode* proj = iff->proj_out(1 - entry->as_Proj()->_con);
-      if (proj->unique_ctrl_out()->Opcode() != Op_Halt) {
-        break;
-      }
-      if (iff->in(1)->Opcode() == Op_Opaque4) {
-        // Compute value of loop induction variable at the end of the first iteration
-        Node* max_value = _igvn.intcon(2 * stride_con - (stride_con > 0 ? 1 : -1));
-        max_value = new AddINode(init, max_value);
-        register_new_node(max_value, get_ctrl(iff->in(1)));
-        update_skeleton_predicate(iff, max_value);
-      }
-      entry = entry->in(0)->in(0);
-    }
-  }
-
   // Adjust loop limit to keep valid iterations number after unroll.
   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
   // which may overflow.
--- a/src/hotspot/share/opto/loopnode.hpp	Thu Jun 14 16:56:58 2018 +0900
+++ b/src/hotspot/share/opto/loopnode.hpp	Thu Jun 14 11:22:04 2018 +0200
@@ -735,8 +735,6 @@
   void duplicate_predicates(CountedLoopNode* pre_head, Node *min_taken, Node* castii,
                             IdealLoopTree* outer_loop, LoopNode* outer_main_head,
                             uint dd_main_head);
-  Node* update_skeleton_predicate(Node* iff, Node* value, Node* entry = NULL, Node* uncommon_proj = NULL,
-                                  Node* min_taken = NULL, IdealLoopTree* outer_loop = NULL, Node* prev_proj = NULL);
 
 public:
 
--- a/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java	Thu Jun 14 16:56:58 2018 +0900
+++ b/test/hotspot/jtreg/compiler/loopopts/IterationSplitPredicateInconsistency.java	Thu Jun 14 11:22:04 2018 +0200
@@ -23,12 +23,11 @@
 
 /**
  * @test
- * @bug 8193130 8203915
+ * @bug 8193130
  * @summary Bad graph when unrolled loop bounds conflicts with range checks
  *
  * @run main/othervm IterationSplitPredicateInconsistency
  * @run main/othervm -XX:-UseLoopPredicate IterationSplitPredicateInconsistency
- * @run main/othervm -XX:LoopStripMiningIter=0 IterationSplitPredicateInconsistency
  *
  */
 
--- a/test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java	Thu Jun 14 16:56:58 2018 +0900
+++ b/test/hotspot/jtreg/compiler/loopopts/TestOverunrolling.java	Thu Jun 14 11:22:04 2018 +0200
@@ -23,12 +23,11 @@
 
 /*
  * @test
- * @bug 8159016 8202949 8203915
+ * @bug 8159016 8202949
  * @summary Tests correct dominator information after over-unrolling a loop.
  * @requires vm.gc == "Parallel" | vm.gc == "null"
  * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockExperimentalVMOptions
  *                   -Xcomp -XX:-TieredCompilation -XX:-UseSwitchProfiling
- *                   -XX:-UseCountedLoopSafepoints -XX:LoopUnrollLimit=250
  *                   -XX:-UseG1GC -XX:+UseParallelGC compiler.loopopts.TestOverunrolling
  */
 
@@ -82,74 +81,11 @@
         }
     }
 
-    // Similar to test2 but we cannot statically determine the upper bound of
-    // the inner for loop and can therefore not prevent over-unrolling.
-    public static void test3(int[] array) {
-        int[] iArr = new int[8];
-        for (int i = 0; i < array.length; i++) {
-            for (int j = 5; j < i; j++) {
-                int k = 1;
-                do {
-                    iArr[j] = 0;
-                    switch (k) {
-                    case 1:
-                        lFld = 0;
-                        break;
-                    case 10:
-                        dFld = 0;
-                        break;
-                    }
-                } while (++k < 1);
-            }
-        }
-    }
-
-    // Similar to test3 but with negative stride and constant outer loop limit
-    public static void test4(int[] array, boolean store) {
-        int[] iArr = new int[8];
-        for (int i = -8; i < 8; i++) {
-            for (int j = 5; j > i; j--) {
-                int k = 1;
-                do {
-                    if (store) {
-                        iArr[j] = 0;
-                    }
-                    switch (k) {
-                    case 1:
-                        lFld = 0;
-                        break;
-                    case 10:
-                        dFld = 0;
-                        break;
-                    }
-                } while (++k < 1);
-            }
-        }
-    }
-
-    // The inner for-loop is over-unrolled and vectorized resulting in
-    // a crash in the matcher because the memory input to a vector is top.
-    public static int test5(int[] array) {
-        int result = 0;
-        int[] iArr = new int[8];
-        for (int i = 0; i < array.length; i++) {
-            for (int j = 5; j < i; j++) {
-                iArr[j] += array[j];
-                result += array[j];
-            }
-        }
-        return result;
-    }
-
     public static void main(String args[]) {
         for (int i = 0; i < 42; ++i) {
             test1(i);
         }
         test2();
-        int[] array = new int[8];
-        test3(array);
-        test4(array, false);
-        test5(array);
     }
 }