src/hotspot/share/opto/loopTransform.cpp
branchdatagramsocketimpl-branch
changeset 58678 9cf78a70fa4f
parent 54705 fc7627bf4b01
child 58679 9c3209ff7550
--- a/src/hotspot/share/opto/loopTransform.cpp	Thu Oct 17 20:27:44 2019 +0100
+++ b/src/hotspot/share/opto/loopTransform.cpp	Thu Oct 17 20:53:35 2019 +0100
@@ -286,6 +286,9 @@
   Node* n2 = n1->in(3 - inv1_idx);
   int inv2_idx = is_invariant_addition(n2, phase);
   if (!inv2_idx) return NULL;
+
+  if (!phase->may_require_nodes(10, 10)) return NULL;
+
   Node* x    = n2->in(3 - inv2_idx);
   Node* inv2 = n2->in(inv2_idx);
 
@@ -337,61 +340,72 @@
       Node* nn = reassociate_add_sub(n, phase);
       if (nn == NULL) break;
       n = nn; // again
-    };
+    }
   }
 }
 
 //------------------------------policy_peeling---------------------------------
-// Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
-// make some loop-invariant test (usually a null-check) happen before the loop.
-bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
-  IdealLoopTree *loop = (IdealLoopTree*)this;
+// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
+// is applicable if we can make a loop-invariant test (usually a null-check)
+// execute before we enter the loop. When TRUE, the estimated node budget is
+// also requested.
+bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
+  uint estimate = estimate_peeling(phase);
+
+  return estimate == 0 ? false : phase->may_require_nodes(estimate);
+}
+
+// Perform actual policy and size estimate for the loop peeling transform, and
+// return the estimated loop size if peeling is applicable, otherwise return
+// zero. No node budget is allocated.
+uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
 
   // If nodes are depleted, some transform has miscalculated its needs.
   assert(!phase->exceeding_node_budget(), "sanity");
 
-  uint body_size = loop->_body.size();
-  // Peeling does loop cloning which can result in O(N^2) node construction
-  if (body_size > 255) {
-    return false;   // Prevent overflow for large body size
+  // Peeling does loop cloning which can result in O(N^2) node construction.
+  if (_body.size() > 255) {
+    return 0;   // Suppress too large body size.
   }
-  uint estimate = body_size * body_size;
+  // Optimistic estimate that approximates loop body complexity via data and
+  // control flow fan-out (instead of using the more pessimistic: BodySize^2).
+  uint estimate = est_loop_clone_sz(2);
+
   if (phase->exceeding_node_budget(estimate)) {
-    return false;   // Too large to safely clone
+    return 0;   // Too large to safely clone.
   }
 
-  // check for vectorized loops, any peeling done was already applied
+  // Check for vectorized loops, any peeling done was already applied.
   if (_head->is_CountedLoop()) {
     CountedLoopNode* cl = _head->as_CountedLoop();
     if (cl->is_unroll_only() || cl->trip_count() == 1) {
-      return false;
+      return 0;
     }
   }
 
-  Node* test = loop->tail();
-
-  while (test != _head) {       // Scan till run off top of loop
-    if (test->is_If()) {        // Test?
+  Node* test = tail();
+
+  while (test != _head) {   // Scan till run off top of loop
+    if (test->is_If()) {    // Test?
       Node *ctrl = phase->get_ctrl(test->in(1));
       if (ctrl->is_top()) {
-        return false;           // Found dead test on live IF?  No peeling!
+        return 0;           // Found dead test on live IF?  No peeling!
       }
-      // Standard IF only has one input value to check for loop invariance
+      // Standard IF only has one input value to check for loop invariance.
       assert(test->Opcode() == Op_If ||
              test->Opcode() == Op_CountedLoopEnd ||
              test->Opcode() == Op_RangeCheck,
              "Check this code when new subtype is added");
       // Condition is not a member of this loop?
       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
-        // Found reason to peel!
-        return phase->may_require_nodes(estimate);
+        return estimate;    // Found reason to peel!
       }
     }
-    // Walk up dominators to loop _head looking for test which is
-    // executed on every path thru loop.
+    // Walk up dominators to loop _head looking for test which is executed on
+    // every path through the loop.
     test = phase->idom(test);
   }
-  return false;
+  return 0;
 }
 
 //------------------------------peeled_dom_test_elim---------------------------
@@ -638,8 +652,8 @@
     }
   }
 
-
   // Step 4: Correct dom-depth info.  Set to loop-head depth.
+
   int dd = dom_depth(head);
   set_idom(head, head->in(1), dd);
   for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
@@ -657,57 +671,50 @@
   loop->record_for_igvn();
 }
 
-#define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop
-
 //------------------------------policy_maximally_unroll------------------------
-// Calculate exact loop trip count and return true if loop can be maximally
-// unrolled.
-bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
-  CountedLoopNode *cl = _head->as_CountedLoop();
+// Calculate the exact  loop trip-count and return TRUE if loop can be fully,
+// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
+// node budget is also requested.
+bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop* phase) const {
+  CountedLoopNode* cl = _head->as_CountedLoop();
   assert(cl->is_normal_loop(), "");
   if (!cl->is_valid_counted_loop()) {
-    return false; // Malformed counted loop
+    return false;   // Malformed counted loop.
   }
   if (!cl->has_exact_trip_count()) {
-    // Trip count is not exact.
-    return false;
+    return false;   // Trip count is not exact.
   }
 
   uint trip_count = cl->trip_count();
   // Note, max_juint is used to indicate unknown trip count.
   assert(trip_count > 1, "one iteration loop should be optimized out already");
-  assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
+  assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
 
   // If nodes are depleted, some transform has miscalculated its needs.
   assert(!phase->exceeding_node_budget(), "sanity");
 
-  // Real policy: if we maximally unroll, does it get too big?
-  // Allow the unrolled mess to get larger than standard loop
-  // size.  After all, it will no longer be a loop.
-  uint body_size    = _body.size();
+  // Allow the unrolled body to get larger than the standard loop size limit.
   uint unroll_limit = (uint)LoopUnrollLimit * 4;
   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
-  if (trip_count > unroll_limit || body_size > unroll_limit) {
+  if (trip_count > unroll_limit || _body.size() > unroll_limit) {
     return false;
   }
 
-  // Take into account that after unroll conjoined heads and tails will fold,
-  // otherwise policy_unroll() may allow more unrolling than max unrolling.
-  uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
+  uint new_body_size = est_loop_unroll_sz(trip_count);
 
   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
     return false;
   }
 
-  // Fully unroll a loop with few iterations regardless next conditions since
-  // following loop optimizations will split such loop anyway (pre-main-post).
+  // Fully unroll a loop with few iterations, regardless of other conditions,
+  // since the following (general) loop optimizations will split such loop in
+  // any case (into pre-main-post).
   if (trip_count <= 3) {
     return phase->may_require_nodes(new_body_size);
   }
 
-  if (new_body_size > unroll_limit ||
-      // Unrolling can result in a large amount of node construction
-      phase->exceeding_node_budget(new_body_size)) {
+  // Reject if unrolling will result in too much node construction.
+  if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
     return false;
   }
 
@@ -742,8 +749,9 @@
 
 
 //------------------------------policy_unroll----------------------------------
-// Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if the
-// loop is a CountedLoop and the body is small enough.
+// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
+// the loop is  a counted loop and  the loop body is small  enough. When TRUE,
+// the estimated node budget is also requested.
 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 
   CountedLoopNode *cl = _head->as_CountedLoop();
@@ -887,7 +895,7 @@
     LoopMaxUnroll = slp_max_unroll_factor;
   }
 
-  uint estimate = est_loop_clone_sz(2, body_size);
+  uint estimate = est_loop_clone_sz(2);
 
   if (cl->has_passed_slp()) {
     if (slp_max_unroll_factor >= future_unroll_cnt) {
@@ -958,8 +966,10 @@
 }
 
 //------------------------------policy_range_check-----------------------------
-// Return TRUE or FALSE if the loop should be range-check-eliminated.
-// Actually we do iteration-splitting, a more powerful form of RCE.
+// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
+// When TRUE, the estimated node budget is also requested.
+//
+// We will actually perform iteration-splitting, a more powerful form of RCE.
 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
   if (!RangeCheckElimination) return false;
 
@@ -967,9 +977,9 @@
   assert(!phase->exceeding_node_budget(), "sanity");
 
   CountedLoopNode *cl = _head->as_CountedLoop();
-  // If we unrolled with no intention of doing RCE and we later
-  // changed our minds, we got no pre-loop.  Either we need to
-  // make a new pre-loop, or we gotta disallow RCE.
+  // If we unrolled  with no intention of doing RCE and we  later changed our
+  // minds, we got no pre-loop.  Either we need to make a new pre-loop, or we
+  // have to disallow RCE.
   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   Node *trip_counter = cl->phi();
 
@@ -1016,13 +1026,13 @@
       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
         continue;
       }
-      // Found a test like 'trip+off vs  limit'.  Test is an IfNode, has two
-      // (2) projections.  If BOTH are in  the loop we need loop unswitching
-      // instead of iteration splitting.
+      // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
+      // projections. If BOTH are in the loop we need loop unswitching instead
+      // of iteration splitting.
       if (is_loop_exit(iff)) {
         // Found valid reason to split iterations (if there is room).
         // NOTE: Usually a gross overestimate.
-        return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
+        return phase->may_require_nodes(est_loop_clone_sz(2));
       }
     } // End of is IF
   }
@@ -1257,7 +1267,7 @@
   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
   register_new_node(frame, C->start());
   // It's impossible for the predicate to fail at runtime. Use an Halt node.
-  Node* halt = new HaltNode(other_proj, frame);
+  Node* halt = new HaltNode(other_proj, frame, "duplicated predicate failed which is impossible");
   C->root()->add_req(halt);
   new_iff->set_req(0, prev_proj);
 
@@ -1521,9 +1531,6 @@
   // only process vectorized main loops
   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
 
-  if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
-    return;
-  }
   int slp_max_unroll_factor = cl->slp_max_unroll();
   int cur_unroll = cl->unrolled_count();
 
@@ -1535,6 +1542,10 @@
   // we only ever process this one time
   if (cl->has_atomic_post_loop()) return;
 
+  if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
+    return;
+  }
+
 #ifndef PRODUCT
   if (TraceLoopOpts) {
     tty->print("PostVector  ");
@@ -2418,7 +2429,7 @@
   register_control(iftrue, loop->_parent, new_iff);
   Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
   register_new_node(frame, C->start());
-  Node* halt = new HaltNode(iffalse, frame);
+  Node* halt = new HaltNode(iffalse, frame, "range check predicate failed which is impossible");
   register_control(halt, _ltree_root, iffalse);
   C->root()->add_req(halt);
   return iftrue;
@@ -3118,6 +3129,13 @@
     // We also need to replace the original limit to collapse loop exit.
     Node* cmp = cl->loopexit()->cmp_node();
     assert(cl->limit() == cmp->in(2), "sanity");
+    // Duplicate cmp node if it has other users
+    if (cmp->outcnt() > 1) {
+      cmp = cmp->clone();
+      cmp = phase->_igvn.register_new_node_with_optimizer(cmp);
+      BoolNode *bol = cl->loopexit()->in(CountedLoopEndNode::TestValue)->as_Bool();
+      phase->_igvn.replace_input_of(bol, 1, cmp); // put bol on worklist
+    }
     phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
     phase->_igvn.replace_input_of(cmp, 2, exact_limit); // put cmp on worklist
   }
@@ -3178,9 +3196,6 @@
 
   AutoNodeBudget node_budget(phase);
 
-  bool should_peel     = policy_peeling(phase);
-  bool should_unswitch = policy_unswitching(phase);
-
   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
   // This removes loop-invariant tests (usually null checks).
   if (!_head->is_CountedLoop()) { // Non-counted loop
@@ -3188,10 +3203,10 @@
       // Partial peel succeeded so terminate this round of loop opts
       return false;
     }
-    if (should_peel) {            // Should we peel?
+    if (policy_peeling(phase)) {    // Should we peel?
       if (PrintOpto) { tty->print_cr("should_peel"); }
-      phase->do_peeling(this,old_new);
-    } else if (should_unswitch) {
+      phase->do_peeling(this, old_new);
+    } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
     return true;
@@ -3209,12 +3224,11 @@
   // Before attempting fancy unrolling, RCE or alignment, see if we want
   // to completely unroll this loop or do loop unswitching.
   if (cl->is_normal_loop()) {
-    if (should_unswitch) {
+    if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
       return true;
     }
-    bool should_maximally_unroll = policy_maximally_unroll(phase);
-    if (should_maximally_unroll) {
+    if (policy_maximally_unroll(phase)) {
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
       phase->do_maximally_unroll(this, old_new);
@@ -3222,6 +3236,9 @@
     }
   }
 
+  uint est_peeling = estimate_peeling(phase);
+  bool should_peel = 0 < est_peeling;
+
   // Counted loops may be peeled, may need some iterations run up
   // front for RCE, and may want to align loop refs to a cache
   // line.  Thus we clone a full loop up front whose trip count is
@@ -3252,14 +3269,15 @@
   // peeling.
   if (should_rce || should_align || should_unroll) {
     if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
-      if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
+      uint estimate = est_loop_clone_sz(3);
+      if (!phase->may_require_nodes(estimate)) {
         return false;
       }
-      phase->insert_pre_post_loops(this,old_new, !may_rce_align);
+      phase->insert_pre_post_loops(this, old_new, !may_rce_align);
     }
-    // Adjust the pre- and main-loop limits to let the pre and post loops run
-    // with full checks, but the main-loop with no checks.  Remove said
-    // checks from the main body.
+    // Adjust the pre- and main-loop limits to let the pre and  post loops run
+    // with full checks, but the main-loop with no checks.  Remove said checks
+    // from the main body.
     if (should_rce) {
       if (phase->do_range_check(this, old_new) != 0) {
         cl->mark_has_range_checks();
@@ -3293,7 +3311,9 @@
     }
   } else {                      // Else we have an unchanged counted loop
     if (should_peel) {          // Might want to peel but do nothing else
-      phase->do_peeling(this,old_new);
+      if (phase->may_require_nodes(est_peeling)) {
+        phase->do_peeling(this, old_new);
+      }
     }
   }
   return true;