jdk-sandbox: comparison src/hotspot/share/opto/loopnode.cpp

equal deleted inserted replaced

-:13588c901957
+:9cf78a70fa4f
 /*
-* Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 }
 wq.clear();
 wq.push(u);
 bool found_sfpt = false;
 for (uint next = 0; next < wq.size() && !found_sfpt; next++) {
-Node *n = wq.at(next);
+Node* n = wq.at(next);
 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) {
 Node* u = n->fast_out(i);
 if (u == sfpt) {
 found_sfpt = true;
 }
 }
 }
 assert(found_sfpt, "no node in loop that's not input to safepoint");
 }
 }
 CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
 assert(cle == inner->loopexit_or_null(), "mismatch");
 bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
 if (has_skeleton) {
 assert(expect_skeleton == 1 || expect_skeleton == -1, "unexpected skeleton node");
 assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops");
 if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase);
 if (loop->_next)  loop->_next ->counted_loop(phase);
 }
+// The Estimated Loop Clone Size:
+//   CloneFactor * (~112% * BodySize + BC) + CC + FanOutTerm,
+// where  BC and  CC are  totally ad-hoc/magic  "body" and "clone" constants,
+// respectively, used to ensure that the node usage estimates made are on the
+// safe side, for the most part. The FanOutTerm is an attempt to estimate the
+// possible additional/excessive nodes generated due to data and control flow
+// merging, for edges reaching outside the loop.
+uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
+precond(0 < factor && factor < 16);
+uint const bc = 13;
+uint const cc = 17;
+uint const sz = _body.size() + (_body.size() + 7) / 8;
+uint estimate = factor * (sz + bc) + cc;
+assert((estimate - cc) / factor == sz + bc, "overflow");
+return estimate + est_loop_flow_merge_sz();
+}
+// The Estimated Loop (full-) Unroll Size:
+//   UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
+// where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
+// node usage estimates made are on the safe side, for the most part. This is
+// a "light" version of the loop clone size calculation (above), based on the
+// assumption that most of the loop-construct overhead will be unraveled when
+// (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
+// including an overflow check and returning UINT_MAX in case of an overflow.
+uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
+precond(factor > 0);
+// Take into account that after unroll conjoined heads and tails will fold.
+uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
+uint const cc = 7;
+uint const sz = b0 + (b0 + 15) / 16;
+uint estimate = factor * sz + cc;
+if ((estimate - cc) / factor != sz) {
+return UINT_MAX;
+}
+return estimate + est_loop_flow_merge_sz();
+}
+// Estimate the growth effect (in nodes) of merging control and data flow when
+// cloning a loop body, based on the amount of  control and data flow reaching
+// outside of the (current) loop body.
+uint IdealLoopTree::est_loop_flow_merge_sz() const {
+uint ctrl_edge_out_cnt = 0;
+uint data_edge_out_cnt = 0;
+for (uint i = 0; i < _body.size(); i++) {
+Node* node = _body.at(i);
+uint outcnt = node->outcnt();
+for (uint k = 0; k < outcnt; k++) {
+Node* out = node->raw_out(k);
+if (out->is_CFG()) {
+if (!is_member(_phase->get_loop(out))) {
+ctrl_edge_out_cnt++;
+}
+} else {
+Node* ctrl = _phase->get_ctrl(out);
+assert(ctrl->is_CFG(), "must be");
+if (!is_member(_phase->get_loop(ctrl))) {
+data_edge_out_cnt++;
+}
+}
+}
+}
+// Use data and control count (x2.0) in estimate iff both are > 0. This is
+// a rather pessimistic estimate for the most part, in particular for some
+// complex loops, but still not enough to capture all loops.
+if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
+return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
+}
+return 0;
+}
 #ifndef PRODUCT
 //------------------------------dump_head--------------------------------------
 // Dump 1 liner for loop header info
-void IdealLoopTree::dump_head( ) const {
+void IdealLoopTree::dump_head() const {
-for (uint i=0; i<_nest; i++)
+tty->sp(2 * _nest);
-tty->print("  ");
+tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
-tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
 if (_irreducible) tty->print(" IRREDUCIBLE");
 Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
 Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
 if (predicate != NULL ) {
 tty->print(" limit_check");
 tty->cr();
 }
 //------------------------------dump-------------------------------------------
 // Dump loops by loop tree
-void IdealLoopTree::dump( ) const {
+void IdealLoopTree::dump() const {
 dump_head();
 if (_child) _child->dump();
 if (_next)  _next ->dump();
 }
 //=============================================================================
 //----------------------------build_and_optimize-------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
 // its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
 void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
-bool do_split_ifs = (mode == LoopOptsDefault || mode == LoopOptsLastRound);
+bool do_split_ifs = (mode == LoopOptsDefault);
 bool skip_loop_opts = (mode == LoopOptsNone);
 int old_progress = C->major_progress();
 uint orig_worklist_size = _igvn._worklist.size();
 NOT_PRODUCT( C->verify_graph_edges(); )
 worklist.push( C->top() );
 build_loop_late( visited, worklist, nstack );
 if (_verify_only) {
-// restore major progress flag
+C->restore_major_progress(old_progress);
-for (int i = 0; i < old_progress; i++)
-C->set_major_progress();
 assert(C->unique() == unique, "verification mode made Nodes? ? ?");
 assert(_igvn._worklist.size() == orig_worklist_size, "shouldn't push anything");
 return;
 }
 if (_verify_me) {             // Nested verify pass?
 // Check to see if the verify mode is broken
 assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
 return;
 }
-if(VerifyLoopOptimizations) verify();
+if (VerifyLoopOptimizations) verify();
-if(TraceLoopOpts && C->has_loops()) {
+if (TraceLoopOpts && C->has_loops()) {
 _ltree_root->dump();
 }
 #endif
 if (skip_loop_opts) {
 // restore major progress flag
-for (int i = 0; i < old_progress; i++) {
+C->restore_major_progress(old_progress);
-C->set_major_progress();
-}
 // Cleanup any modified bits
 _igvn.optimize();
 if (C->log() != NULL) {
 }
 return;
 }
 if (ReassociateInvariants) {
-AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
 // Reassociate invariants and prep for split_thru_phi
 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
 IdealLoopTree* lpt = iter.current();
 bool is_counted = lpt->is_counted();
 if (!is_counted || !lpt->is_innermost()) continue;
 // check for vectorized loops, any reassociation of invariants was already done
-if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) continue;
+if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) {
+continue;
-lpt->reassociate_invariants(this);
+} else {
+AutoNodeBudget node_budget(this);
+lpt->reassociate_invariants(this);
+}
 // Because RCE opportunities can be masked by split_thru_phi,
 // look for RCE candidates and inhibit split_thru_phi
 // on just their loop-phi's for this pass of loop opts
 if (SplitIfBlocks && do_split_ifs) {
+AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
 if (lpt->policy_range_check(this)) {
 lpt->_rce_candidate = 1; // = true
 }
 }
 }
 // Check for aggressive application of split-if and other transforms
 // that require basic-block info (like cloning through Phi's)
 if( SplitIfBlocks && do_split_ifs ) {
 visited.Clear();
-split_if_with_blocks( visited, nstack, mode == LoopOptsLastRound );
+split_if_with_blocks( visited, nstack);
 NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
-if (mode == LoopOptsLastRound) {
-C->set_major_progress();
-}
 }
 if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
 C->set_major_progress();
 }
 assert( fail == 0, "verify loops failed" );
 // Verify loop structure is the same
 _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
 // Reset major-progress.  It was cleared by creating a verify version of
 // PhaseIdealLoop.
-for( int i=0; i<old_progress; i++ )
+C->restore_major_progress(old_progress);
-C->set_major_progress();
 }
 //------------------------------verify_compare---------------------------------
 // Make sure me and the given PhaseIdealLoop agree on key data structures
 void PhaseIdealLoop::verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const {
 // V-N'ing.  Easier and quicker than searching through
 // the program structure.
 Node *frame = new ParmNode( C->start(), TypeFunc::FramePtr );
 _igvn.register_new_node_with_optimizer(frame);
 // Halt & Catch Fire
-Node *halt = new HaltNode( if_f, frame );
+Node* halt = new HaltNode(if_f, frame, "never-taken loop exit reached");
 _igvn.register_new_node_with_optimizer(halt);
 set_loop(halt, l);
 C->root()->add_req(halt);
 }
 set_loop(C->root(), _ltree_root);
 // instructions and for rescheduling the load.  The users of the memory
 // input of this load are examined.  Any use which is not a load and is
 // dominated by early is considered a potentially interfering store.
 // This can produce false positives.
 if (n->is_Load() && LCA != early) {
-Node_List worklist;
+int load_alias_idx = C->get_alias_index(n->adr_type());
+if (C->alias_type(load_alias_idx)->is_rewritable()) {
-Node *mem = n->in(MemNode::Memory);
-for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+Node_List worklist;
-Node* s = mem->fast_out(i);
-worklist.push(s);
+Node *mem = n->in(MemNode::Memory);
-}
+for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-while(worklist.size() != 0 && LCA != early) {
+Node* s = mem->fast_out(i);
-Node* s = worklist.pop();
+worklist.push(s);
-if (s->is_Load() || s->Opcode() == Op_SafePoint ||
+}
-(s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
+while(worklist.size() != 0 && LCA != early) {
-continue;
+Node* s = worklist.pop();
-} else if (s->is_MergeMem()) {
+if (s->is_Load() || s->Opcode() == Op_SafePoint ||
-for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+(s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) {
-Node* s1 = s->fast_out(i);
+continue;
-worklist.push(s1);
+} else if (s->is_MergeMem()) {
-}
+for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
-} else {
+Node* s1 = s->fast_out(i);
-Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+worklist.push(s1);
-assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+}
-if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
+} else {
-LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+if (sctrl != NULL && !sctrl->is_top() && C->can_alias(s->adr_type(), load_alias_idx) && is_dominator(early, sctrl)) {
+LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+}
 }
 }
 }
 }
 case Op_LoadKlass:
 case Op_LoadNKlass:
 case Op_LoadL:
 case Op_LoadS:
 case Op_LoadP:
-case Op_LoadBarrierSlowReg:
-case Op_LoadBarrierWeakSlowReg:
 case Op_LoadN:
 case Op_LoadRange:
 case Op_LoadD_unaligned:
 case Op_LoadL_unaligned:
 case Op_StrComp:            // Does a bunch of load-like effects
 }
 #endif
 #ifndef PRODUCT
 //------------------------------dump-------------------------------------------
-void PhaseIdealLoop::dump( ) const {
+void PhaseIdealLoop::dump() const {
 ResourceMark rm;
 Arena* arena = Thread::current()->resource_area();
 Node_Stack stack(arena, C->live_nodes() >> 2);
 Node_List rpo_list;
 VectorSet visited(arena);
 visited.set(C->top()->_idx);
-rpo( C->root(), stack, visited, rpo_list );
+rpo(C->root(), stack, visited, rpo_list);
 // Dump root loop indexed by last element in PO order
-dump( _ltree_root, rpo_list.size(), rpo_list );
+dump(_ltree_root, rpo_list.size(), rpo_list);
 }
-void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
+void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const {
 loop->dump_head();
 // Now scan for CFG nodes in the same loop
-for( uint j=idx; j > 0;  j-- ) {
+for (uint j = idx; j > 0; j--) {
-Node *n = rpo_list[j-1];
+Node* n = rpo_list[j-1];
-if( !_nodes[n->_idx] )      // Skip dead nodes
+if (!_nodes[n->_idx])      // Skip dead nodes
 continue;
-if( get_loop(n) != loop ) { // Wrong loop nest
-if( get_loop(n)->_head == n &&    // Found nested loop?
+if (get_loop(n) != loop) { // Wrong loop nest
-get_loop(n)->_parent == loop )
+if (get_loop(n)->_head == n &&    // Found nested loop?
-dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
+get_loop(n)->_parent == loop)
+dump(get_loop(n), rpo_list.size(), rpo_list);     // Print it nested-ly
 continue;
 }
 // Dump controlling node
-for( uint x = 0; x < loop->_nest; x++ )
+tty->sp(2 * loop->_nest);
-tty->print("  ");
 tty->print("C");
-if( n == C->root() ) {
+if (n == C->root()) {
 n->dump();
 } else {
 Node* cached_idom   = idom_no_update(n);
-Node *computed_idom = n->in(0);
+Node* computed_idom = n->in(0);
-if( n->is_Region() ) {
+if (n->is_Region()) {
 computed_idom = compute_idom(n);
 // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
 // any MultiBranch ctrl node), so apply a similar transform to
 // the cached idom returned from idom_no_update.
 cached_idom = find_non_split_ctrl(cached_idom);
 }
-tty->print(" ID:%d",computed_idom->_idx);
+tty->print(" ID:%d", computed_idom->_idx);
 n->dump();
-if( cached_idom != computed_idom ) {
+if (cached_idom != computed_idom) {
 tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
 computed_idom->_idx, cached_idom->_idx);
 }
 }
 // Dump nodes it controls
-for( uint k = 0; k < _nodes.Size(); k++ ) {
+for (uint k = 0; k < _nodes.Size(); k++) {
 // (k < C->unique() && get_ctrl(find(k)) == n)
 if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
-Node *m = C->root()->find(k);
+Node* m = C->root()->find(k);
-if( m && m->outcnt() > 0 ) {
+if (m && m->outcnt() > 0) {
 if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
 tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
 _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
 }
-for( uint j = 0; j < loop->_nest; j++ )
+tty->sp(2 * loop->_nest + 1);
-tty->print("  ");
-tty->print(" ");
 m->dump();
 }
 }
 }
 }
 }
 #endif
 // Collect a R-P-O for the whole CFG.
 // Result list is in post-order (scan backwards for RPO)
-void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
+void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const {
 stk.push(start, 0);
 visited.set(start->_idx);
 while (stk.is_nonempty()) {
 Node* m   = stk.node();
 }
 }
 //=============================================================================
-//------------------------------LoopTreeIterator-----------------------------------
+//------------------------------LoopTreeIterator-------------------------------
 // Advance to next loop tree using a preorder, left-to-right traversal.
 void LoopTreeIterator::next() {
 assert(!done(), "must not be done.");
 if (_curnt->_child != NULL) {

branch	datagramsocketimpl-branch
changeset 58678	9cf78a70fa4f
parent 54705	fc7627bf4b01
child 58679	9c3209ff7550