src/hotspot/share/opto/loopnode.cpp
branchdatagramsocketimpl-branch
changeset 58678 9cf78a70fa4f
parent 54705 fc7627bf4b01
child 58679 9c3209ff7550
--- a/src/hotspot/share/opto/loopnode.cpp	Thu Oct 17 20:27:44 2019 +0100
+++ b/src/hotspot/share/opto/loopnode.cpp	Thu Oct 17 20:53:35 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -978,7 +978,7 @@
         wq.push(u);
         bool found_sfpt = false;
         for (uint next = 0; next < wq.size() && !found_sfpt; next++) {
-          Node *n = wq.at(next);
+          Node* n = wq.at(next);
           for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) {
             Node* u = n->fast_out(i);
             if (u == sfpt) {
@@ -992,6 +992,7 @@
         assert(found_sfpt, "no node in loop that's not input to safepoint");
       }
     }
+
     CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
     assert(cle == inner->loopexit_or_null(), "mismatch");
     bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
@@ -2439,13 +2440,96 @@
   if (loop->_next)  loop->_next ->counted_loop(phase);
 }
 
+
+// The Estimated Loop Clone Size:
+//   CloneFactor * (~112% * BodySize + BC) + CC + FanOutTerm,
+// where  BC and  CC are  totally ad-hoc/magic  "body" and "clone" constants,
+// respectively, used to ensure that the node usage estimates made are on the
+// safe side, for the most part. The FanOutTerm is an attempt to estimate the
+// possible additional/excessive nodes generated due to data and control flow
+// merging, for edges reaching outside the loop.
+uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
+
+  precond(0 < factor && factor < 16);
+
+  uint const bc = 13;
+  uint const cc = 17;
+  uint const sz = _body.size() + (_body.size() + 7) / 8;
+  uint estimate = factor * (sz + bc) + cc;
+
+  assert((estimate - cc) / factor == sz + bc, "overflow");
+
+  return estimate + est_loop_flow_merge_sz();
+}
+
+// The Estimated Loop (full-) Unroll Size:
+//   UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
+// where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
+// node usage estimates made are on the safe side, for the most part. This is
+// a "light" version of the loop clone size calculation (above), based on the
+// assumption that most of the loop-construct overhead will be unraveled when
+// (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
+// including an overflow check and returning UINT_MAX in case of an overflow.
+uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
+
+  precond(factor > 0);
+
+  // Take into account that after unroll conjoined heads and tails will fold.
+  uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
+  uint const cc = 7;
+  uint const sz = b0 + (b0 + 15) / 16;
+  uint estimate = factor * sz + cc;
+
+  if ((estimate - cc) / factor != sz) {
+    return UINT_MAX;
+  }
+
+  return estimate + est_loop_flow_merge_sz();
+}
+
+// Estimate the growth effect (in nodes) of merging control and data flow when
+// cloning a loop body, based on the amount of  control and data flow reaching
+// outside of the (current) loop body.
+uint IdealLoopTree::est_loop_flow_merge_sz() const {
+
+  uint ctrl_edge_out_cnt = 0;
+  uint data_edge_out_cnt = 0;
+
+  for (uint i = 0; i < _body.size(); i++) {
+    Node* node = _body.at(i);
+    uint outcnt = node->outcnt();
+
+    for (uint k = 0; k < outcnt; k++) {
+      Node* out = node->raw_out(k);
+
+      if (out->is_CFG()) {
+        if (!is_member(_phase->get_loop(out))) {
+          ctrl_edge_out_cnt++;
+        }
+      } else {
+        Node* ctrl = _phase->get_ctrl(out);
+        assert(ctrl->is_CFG(), "must be");
+        if (!is_member(_phase->get_loop(ctrl))) {
+          data_edge_out_cnt++;
+        }
+      }
+    }
+  }
+  // Use data and control count (x2.0) in estimate iff both are > 0. This is
+  // a rather pessimistic estimate for the most part, in particular for some
+  // complex loops, but still not enough to capture all loops.
+  if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
+    return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
+  }
+  return 0;
+}
+
 #ifndef PRODUCT
 //------------------------------dump_head--------------------------------------
 // Dump 1 liner for loop header info
-void IdealLoopTree::dump_head( ) const {
-  for (uint i=0; i<_nest; i++)
-    tty->print("  ");
-  tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
+void IdealLoopTree::dump_head() const {
+  tty->sp(2 * _nest);
+  tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
   if (_irreducible) tty->print(" IRREDUCIBLE");
   Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
   Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
@@ -2513,7 +2597,7 @@
 
 //------------------------------dump-------------------------------------------
 // Dump loops by loop tree
-void IdealLoopTree::dump( ) const {
+void IdealLoopTree::dump() const {
   dump_head();
   if (_child) _child->dump();
   if (_next)  _next ->dump();
@@ -2710,7 +2794,7 @@
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
 void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
-  bool do_split_ifs = (mode == LoopOptsDefault || mode == LoopOptsLastRound);
+  bool do_split_ifs = (mode == LoopOptsDefault);
   bool skip_loop_opts = (mode == LoopOptsNone);
 
   int old_progress = C->major_progress();
@@ -2870,9 +2954,7 @@
   build_loop_late( visited, worklist, nstack );
 
   if (_verify_only) {
-    // restore major progress flag
-    for (int i = 0; i < old_progress; i++)
-      C->set_major_progress();
+    C->restore_major_progress(old_progress);
     assert(C->unique() == unique, "verification mode made Nodes? ? ?");
     assert(_igvn._worklist.size() == orig_worklist_size, "shouldn't push anything");
     return;
@@ -2908,17 +2990,15 @@
     assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
     return;
   }
-  if(VerifyLoopOptimizations) verify();
-  if(TraceLoopOpts && C->has_loops()) {
+  if (VerifyLoopOptimizations) verify();
+  if (TraceLoopOpts && C->has_loops()) {
     _ltree_root->dump();
   }
 #endif
 
   if (skip_loop_opts) {
     // restore major progress flag
-    for (int i = 0; i < old_progress; i++) {
-      C->set_major_progress();
-    }
+    C->restore_major_progress(old_progress);
 
     // Cleanup any modified bits
     _igvn.optimize();
@@ -2938,7 +3018,6 @@
   }
 
   if (ReassociateInvariants) {
-    AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
     // Reassociate invariants and prep for split_thru_phi
     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
       IdealLoopTree* lpt = iter.current();
@@ -2946,14 +3025,17 @@
       if (!is_counted || !lpt->is_innermost()) continue;
 
       // check for vectorized loops, any reassociation of invariants was already done
-      if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) continue;
-
-      lpt->reassociate_invariants(this);
-
+      if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) {
+        continue;
+      } else {
+        AutoNodeBudget node_budget(this);
+        lpt->reassociate_invariants(this);
+      }
       // Because RCE opportunities can be masked by split_thru_phi,
       // look for RCE candidates and inhibit split_thru_phi
       // on just their loop-phi's for this pass of loop opts
       if (SplitIfBlocks && do_split_ifs) {
+        AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
         if (lpt->policy_range_check(this)) {
           lpt->_rce_candidate = 1; // = true
         }
@@ -2965,11 +3047,8 @@
   // that require basic-block info (like cloning through Phi's)
   if( SplitIfBlocks && do_split_ifs ) {
     visited.Clear();
-    split_if_with_blocks( visited, nstack, mode == LoopOptsLastRound );
+    split_if_with_blocks( visited, nstack);
     NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
-    if (mode == LoopOptsLastRound) {
-      C->set_major_progress();
-    }
   }
 
   if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
@@ -3104,8 +3183,7 @@
   _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
   // Reset major-progress.  It was cleared by creating a verify version of
   // PhaseIdealLoop.
-  for( int i=0; i<old_progress; i++ )
-    C->set_major_progress();
+  C->restore_major_progress(old_progress);
 }
 
 //------------------------------verify_compare---------------------------------
@@ -3591,7 +3669,7 @@
           Node *frame = new ParmNode( C->start(), TypeFunc::FramePtr );
           _igvn.register_new_node_with_optimizer(frame);
           // Halt & Catch Fire
-          Node *halt = new HaltNode( if_f, frame );
+          Node* halt = new HaltNode(if_f, frame, "never-taken loop exit reached");
           _igvn.register_new_node_with_optimizer(halt);
           set_loop(halt, l);
           C->root()->add_req(halt);
@@ -3959,28 +4037,32 @@
   // dominated by early is considered a potentially interfering store.
   // This can produce false positives.
   if (n->is_Load() && LCA != early) {
-    Node_List worklist;
-
-    Node *mem = n->in(MemNode::Memory);
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      Node* s = mem->fast_out(i);
-      worklist.push(s);
-    }
-    while(worklist.size() != 0 && LCA != early) {
-      Node* s = worklist.pop();
-      if (s->is_Load() || s->Opcode() == Op_SafePoint ||
-          (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
-        continue;
-      } else if (s->is_MergeMem()) {
-        for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
-          Node* s1 = s->fast_out(i);
-          worklist.push(s1);
-        }
-      } else {
-        Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
-        assert(sctrl != NULL || s->outcnt() == 0, "must have control");
-        if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
-          LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+    int load_alias_idx = C->get_alias_index(n->adr_type());
+    if (C->alias_type(load_alias_idx)->is_rewritable()) {
+
+      Node_List worklist;
+
+      Node *mem = n->in(MemNode::Memory);
+      for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+        Node* s = mem->fast_out(i);
+        worklist.push(s);
+      }
+      while(worklist.size() != 0 && LCA != early) {
+        Node* s = worklist.pop();
+        if (s->is_Load() || s->Opcode() == Op_SafePoint ||
+            (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
+          continue;
+        } else if (s->is_MergeMem()) {
+          for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+            Node* s1 = s->fast_out(i);
+            worklist.push(s1);
+          }
+        } else {
+          Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+          assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+          if (sctrl != NULL && !sctrl->is_top() && C->can_alias(s->adr_type(), load_alias_idx) && is_dominator(early, sctrl)) {
+            LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+          }
         }
       }
     }
@@ -4234,8 +4316,6 @@
     case Op_LoadL:
     case Op_LoadS:
     case Op_LoadP:
-    case Op_LoadBarrierSlowReg:
-    case Op_LoadBarrierWeakSlowReg:
     case Op_LoadN:
     case Op_LoadRange:
     case Op_LoadD_unaligned:
@@ -4443,69 +4523,67 @@
 
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
-void PhaseIdealLoop::dump( ) const {
+void PhaseIdealLoop::dump() const {
   ResourceMark rm;
   Arena* arena = Thread::current()->resource_area();
   Node_Stack stack(arena, C->live_nodes() >> 2);
   Node_List rpo_list;
   VectorSet visited(arena);
   visited.set(C->top()->_idx);
-  rpo( C->root(), stack, visited, rpo_list );
+  rpo(C->root(), stack, visited, rpo_list);
   // Dump root loop indexed by last element in PO order
-  dump( _ltree_root, rpo_list.size(), rpo_list );
+  dump(_ltree_root, rpo_list.size(), rpo_list);
 }
 
-void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
+void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const {
   loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
-  for( uint j=idx; j > 0;  j-- ) {
-    Node *n = rpo_list[j-1];
-    if( !_nodes[n->_idx] )      // Skip dead nodes
+  for (uint j = idx; j > 0; j--) {
+    Node* n = rpo_list[j-1];
+    if (!_nodes[n->_idx])      // Skip dead nodes
       continue;
-    if( get_loop(n) != loop ) { // Wrong loop nest
-      if( get_loop(n)->_head == n &&    // Found nested loop?
-          get_loop(n)->_parent == loop )
-        dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
+
+    if (get_loop(n) != loop) { // Wrong loop nest
+      if (get_loop(n)->_head == n &&    // Found nested loop?
+          get_loop(n)->_parent == loop)
+        dump(get_loop(n), rpo_list.size(), rpo_list);     // Print it nested-ly
       continue;
     }
 
     // Dump controlling node
-    for( uint x = 0; x < loop->_nest; x++ )
-      tty->print("  ");
+    tty->sp(2 * loop->_nest);
     tty->print("C");
-    if( n == C->root() ) {
+    if (n == C->root()) {
       n->dump();
     } else {
       Node* cached_idom   = idom_no_update(n);
-      Node *computed_idom = n->in(0);
-      if( n->is_Region() ) {
+      Node* computed_idom = n->in(0);
+      if (n->is_Region()) {
         computed_idom = compute_idom(n);
         // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
         // any MultiBranch ctrl node), so apply a similar transform to
         // the cached idom returned from idom_no_update.
         cached_idom = find_non_split_ctrl(cached_idom);
       }
-      tty->print(" ID:%d",computed_idom->_idx);
+      tty->print(" ID:%d", computed_idom->_idx);
       n->dump();
-      if( cached_idom != computed_idom ) {
+      if (cached_idom != computed_idom) {
         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
                       computed_idom->_idx, cached_idom->_idx);
       }
     }
     // Dump nodes it controls
-    for( uint k = 0; k < _nodes.Size(); k++ ) {
+    for (uint k = 0; k < _nodes.Size(); k++) {
       // (k < C->unique() && get_ctrl(find(k)) == n)
       if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
-        Node *m = C->root()->find(k);
-        if( m && m->outcnt() > 0 ) {
+        Node* m = C->root()->find(k);
+        if (m && m->outcnt() > 0) {
           if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
             tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
                           _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
           }
-          for( uint j = 0; j < loop->_nest; j++ )
-            tty->print("  ");
-          tty->print(" ");
+          tty->sp(2 * loop->_nest + 1);
           m->dump();
         }
       }
@@ -4516,7 +4594,7 @@
 
 // Collect a R-P-O for the whole CFG.
 // Result list is in post-order (scan backwards for RPO)
-void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
+void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const {
   stk.push(start, 0);
   visited.set(start->_idx);
 
@@ -4538,7 +4616,7 @@
 
 
 //=============================================================================
-//------------------------------LoopTreeIterator-----------------------------------
+//------------------------------LoopTreeIterator-------------------------------
 
 // Advance to next loop tree using a preorder, left-to-right traversal.
 void LoopTreeIterator::next() {