src/hotspot/share/opto/loopnode.cpp
changeset 58354 e6b5ec45ab9e
parent 58061 fafba5cf3546
child 58471 bada0782842a
equal deleted inserted replaced
58353:146bb7afdcf4 58354:e6b5ec45ab9e
  2469   uint const sz = _body.size() + (_body.size() + 7) / 8;
  2469   uint const sz = _body.size() + (_body.size() + 7) / 8;
  2470   uint estimate = factor * (sz + bc) + cc;
  2470   uint estimate = factor * (sz + bc) + cc;
  2471 
  2471 
  2472   assert((estimate - cc) / factor == sz + bc, "overflow");
  2472   assert((estimate - cc) / factor == sz + bc, "overflow");
  2473 
  2473 
       
  2474   return estimate + est_loop_flow_merge_sz();
       
  2475 }
       
  2476 
       
  2477 // The Estimated Loop (full-) Unroll Size:
       
  2478 //   UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
       
  2479 // where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
       
  2480 // node usage estimates made are on the safe side, for the most part. This is
       
  2481 // a "light" version of the loop clone size calculation (above), based on the
       
  2482 // assumption that most of the loop-construct overhead will be unraveled when
       
  2483 // (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
       
  2484 // including an overflow check and returning UINT_MAX in case of an overflow.
       
  2485 uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
       
  2486 
       
  2487   precond(factor > 0);
       
  2488 
       
  2489   // Take into account that after unroll conjoined heads and tails will fold.
       
  2490   uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
       
  2491   uint const cc = 7;
       
  2492   uint const sz = b0 + (b0 + 15) / 16;
       
  2493   uint estimate = factor * sz + cc;
       
  2494 
       
  2495   if ((estimate - cc) / factor != sz) {
       
  2496     return UINT_MAX;
       
  2497   }
       
  2498 
       
  2499   return estimate + est_loop_flow_merge_sz();
       
  2500 }
       
  2501 
       
  2502 // Estimate the growth effect (in nodes) of merging control and data flow when
       
  2503 // cloning a loop body, based on the amount of  control and data flow reaching
       
  2504 // outside of the (current) loop body.
       
  2505 uint IdealLoopTree::est_loop_flow_merge_sz() const {
       
  2506 
  2474   uint ctrl_edge_out_cnt = 0;
  2507   uint ctrl_edge_out_cnt = 0;
  2475   uint data_edge_out_cnt = 0;
  2508   uint data_edge_out_cnt = 0;
  2476 
  2509 
  2477   for (uint i = 0; i < _body.size(); i++) {
  2510   for (uint i = 0; i < _body.size(); i++) {
  2478     Node* node = _body.at(i);
  2511     Node* node = _body.at(i);
  2492           data_edge_out_cnt++;
  2525           data_edge_out_cnt++;
  2493         }
  2526         }
  2494       }
  2527       }
  2495     }
  2528     }
  2496   }
  2529   }
  2497   // Add data and control count (x2.0) to estimate iff both are > 0. This is
  2530   // Use data and control count (x2.0) in estimate iff both are > 0. This is
  2498   // a rather pessimistic estimate for the most part, in particular for some
  2531   // a rather pessimistic estimate for the most part, in particular for some
  2499   // complex loops, but still not enough to capture all loops.
  2532   // complex loops, but still not enough to capture all loops.
  2500   if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
  2533   if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
  2501     estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
  2534     return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
  2502   }
  2535   }
  2503 
  2536   return 0;
  2504   return estimate;
       
  2505 }
  2537 }
  2506 
  2538 
  2507 #ifndef PRODUCT
  2539 #ifndef PRODUCT
  2508 //------------------------------dump_head--------------------------------------
  2540 //------------------------------dump_head--------------------------------------
  2509 // Dump 1 liner for loop header info
  2541 // Dump 1 liner for loop header info
  2510 void IdealLoopTree::dump_head() const {
  2542 void IdealLoopTree::dump_head() const {
  2511   for (uint i = 0; i < _nest; i++) {
  2543   tty->sp(2 * _nest);
  2512     tty->print("  ");
  2544   tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
  2513   }
       
  2514   tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
       
  2515   if (_irreducible) tty->print(" IRREDUCIBLE");
  2545   if (_irreducible) tty->print(" IRREDUCIBLE");
  2516   Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
  2546   Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
  2517   Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
  2547   Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
  2518   if (predicate != NULL ) {
  2548   if (predicate != NULL ) {
  2519     tty->print(" limit_check");
  2549     tty->print(" limit_check");
  4499 }
  4529 }
  4500 #endif
  4530 #endif
  4501 
  4531 
  4502 #ifndef PRODUCT
  4532 #ifndef PRODUCT
  4503 //------------------------------dump-------------------------------------------
  4533 //------------------------------dump-------------------------------------------
  4504 void PhaseIdealLoop::dump( ) const {
  4534 void PhaseIdealLoop::dump() const {
  4505   ResourceMark rm;
  4535   ResourceMark rm;
  4506   Arena* arena = Thread::current()->resource_area();
  4536   Arena* arena = Thread::current()->resource_area();
  4507   Node_Stack stack(arena, C->live_nodes() >> 2);
  4537   Node_Stack stack(arena, C->live_nodes() >> 2);
  4508   Node_List rpo_list;
  4538   Node_List rpo_list;
  4509   VectorSet visited(arena);
  4539   VectorSet visited(arena);
  4510   visited.set(C->top()->_idx);
  4540   visited.set(C->top()->_idx);
  4511   rpo( C->root(), stack, visited, rpo_list );
  4541   rpo(C->root(), stack, visited, rpo_list);
  4512   // Dump root loop indexed by last element in PO order
  4542   // Dump root loop indexed by last element in PO order
  4513   dump( _ltree_root, rpo_list.size(), rpo_list );
  4543   dump(_ltree_root, rpo_list.size(), rpo_list);
  4514 }
  4544 }
  4515 
  4545 
  4516 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
  4546 void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const {
  4517   loop->dump_head();
  4547   loop->dump_head();
  4518 
  4548 
  4519   // Now scan for CFG nodes in the same loop
  4549   // Now scan for CFG nodes in the same loop
  4520   for( uint j=idx; j > 0;  j-- ) {
  4550   for (uint j = idx; j > 0; j--) {
  4521     Node *n = rpo_list[j-1];
  4551     Node* n = rpo_list[j-1];
  4522     if( !_nodes[n->_idx] )      // Skip dead nodes
  4552     if (!_nodes[n->_idx])      // Skip dead nodes
  4523       continue;
  4553       continue;
  4524     if( get_loop(n) != loop ) { // Wrong loop nest
  4554 
  4525       if( get_loop(n)->_head == n &&    // Found nested loop?
  4555     if (get_loop(n) != loop) { // Wrong loop nest
  4526           get_loop(n)->_parent == loop )
  4556       if (get_loop(n)->_head == n &&    // Found nested loop?
  4527         dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
  4557           get_loop(n)->_parent == loop)
       
  4558         dump(get_loop(n), rpo_list.size(), rpo_list);     // Print it nested-ly
  4528       continue;
  4559       continue;
  4529     }
  4560     }
  4530 
  4561 
  4531     // Dump controlling node
  4562     // Dump controlling node
  4532     for( uint x = 0; x < loop->_nest; x++ )
  4563     tty->sp(2 * loop->_nest);
  4533       tty->print("  ");
       
  4534     tty->print("C");
  4564     tty->print("C");
  4535     if( n == C->root() ) {
  4565     if (n == C->root()) {
  4536       n->dump();
  4566       n->dump();
  4537     } else {
  4567     } else {
  4538       Node* cached_idom   = idom_no_update(n);
  4568       Node* cached_idom   = idom_no_update(n);
  4539       Node *computed_idom = n->in(0);
  4569       Node* computed_idom = n->in(0);
  4540       if( n->is_Region() ) {
  4570       if (n->is_Region()) {
  4541         computed_idom = compute_idom(n);
  4571         computed_idom = compute_idom(n);
  4542         // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
  4572         // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
  4543         // any MultiBranch ctrl node), so apply a similar transform to
  4573         // any MultiBranch ctrl node), so apply a similar transform to
  4544         // the cached idom returned from idom_no_update.
  4574         // the cached idom returned from idom_no_update.
  4545         cached_idom = find_non_split_ctrl(cached_idom);
  4575         cached_idom = find_non_split_ctrl(cached_idom);
  4546       }
  4576       }
  4547       tty->print(" ID:%d",computed_idom->_idx);
  4577       tty->print(" ID:%d", computed_idom->_idx);
  4548       n->dump();
  4578       n->dump();
  4549       if( cached_idom != computed_idom ) {
  4579       if (cached_idom != computed_idom) {
  4550         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
  4580         tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
  4551                       computed_idom->_idx, cached_idom->_idx);
  4581                       computed_idom->_idx, cached_idom->_idx);
  4552       }
  4582       }
  4553     }
  4583     }
  4554     // Dump nodes it controls
  4584     // Dump nodes it controls
  4555     for( uint k = 0; k < _nodes.Size(); k++ ) {
  4585     for (uint k = 0; k < _nodes.Size(); k++) {
  4556       // (k < C->unique() && get_ctrl(find(k)) == n)
  4586       // (k < C->unique() && get_ctrl(find(k)) == n)
  4557       if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
  4587       if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
  4558         Node *m = C->root()->find(k);
  4588         Node* m = C->root()->find(k);
  4559         if( m && m->outcnt() > 0 ) {
  4589         if (m && m->outcnt() > 0) {
  4560           if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
  4590           if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
  4561             tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
  4591             tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
  4562                           _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
  4592                           _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
  4563           }
  4593           }
  4564           for( uint j = 0; j < loop->_nest; j++ )
  4594           tty->sp(2 * loop->_nest + 1);
  4565             tty->print("  ");
       
  4566           tty->print(" ");
       
  4567           m->dump();
  4595           m->dump();
  4568         }
  4596         }
  4569       }
  4597       }
  4570     }
  4598     }
  4571   }
  4599   }
  4572 }
  4600 }
  4573 #endif
  4601 #endif
  4574 
  4602 
  4575 // Collect a R-P-O for the whole CFG.
  4603 // Collect a R-P-O for the whole CFG.
  4576 // Result list is in post-order (scan backwards for RPO)
  4604 // Result list is in post-order (scan backwards for RPO)
  4577 void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
  4605 void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const {
  4578   stk.push(start, 0);
  4606   stk.push(start, 0);
  4579   visited.set(start->_idx);
  4607   visited.set(start->_idx);
  4580 
  4608 
  4581   while (stk.is_nonempty()) {
  4609   while (stk.is_nonempty()) {
  4582     Node* m   = stk.node();
  4610     Node* m   = stk.node();
  4594   }
  4622   }
  4595 }
  4623 }
  4596 
  4624 
  4597 
  4625 
  4598 //=============================================================================
  4626 //=============================================================================
  4599 //------------------------------LoopTreeIterator-----------------------------------
  4627 //------------------------------LoopTreeIterator-------------------------------
  4600 
  4628 
  4601 // Advance to next loop tree using a preorder, left-to-right traversal.
  4629 // Advance to next loop tree using a preorder, left-to-right traversal.
  4602 void LoopTreeIterator::next() {
  4630 void LoopTreeIterator::next() {
  4603   assert(!done(), "must not be done.");
  4631   assert(!done(), "must not be done.");
  4604   if (_curnt->_child != NULL) {
  4632   if (_curnt->_child != NULL) {