2469 uint const sz = _body.size() + (_body.size() + 7) / 8; |
2469 uint const sz = _body.size() + (_body.size() + 7) / 8; |
2470 uint estimate = factor * (sz + bc) + cc; |
2470 uint estimate = factor * (sz + bc) + cc; |
2471 |
2471 |
2472 assert((estimate - cc) / factor == sz + bc, "overflow"); |
2472 assert((estimate - cc) / factor == sz + bc, "overflow"); |
2473 |
2473 |
|
2474 return estimate + est_loop_flow_merge_sz(); |
|
2475 } |
|
2476 |
|
2477 // The Estimated Loop (full-) Unroll Size: |
|
2478 // UnrollFactor * (~106% * BodySize) + CC + FanOutTerm, |
|
2479 // where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that |
|
2480 // node usage estimates made are on the safe side, for the most part. This is |
|
2481 // a "light" version of the loop clone size calculation (above), based on the |
|
2482 // assumption that most of the loop-construct overhead will be unraveled when |
|
2483 // (fully) unrolled. Defined for unroll factors larger or equal to one (>=1), |
|
2484 // including an overflow check and returning UINT_MAX in case of an overflow. |
|
2485 uint IdealLoopTree::est_loop_unroll_sz(uint factor) const { |
|
2486 |
|
2487 precond(factor > 0); |
|
2488 |
|
2489 // Take into account that after unroll conjoined heads and tails will fold. |
|
2490 uint const b0 = _body.size() - EMPTY_LOOP_SIZE; |
|
2491 uint const cc = 7; |
|
2492 uint const sz = b0 + (b0 + 15) / 16; |
|
2493 uint estimate = factor * sz + cc; |
|
2494 |
|
2495 if ((estimate - cc) / factor != sz) { |
|
2496 return UINT_MAX; |
|
2497 } |
|
2498 |
|
2499 return estimate + est_loop_flow_merge_sz(); |
|
2500 } |
|
2501 |
|
2502 // Estimate the growth effect (in nodes) of merging control and data flow when |
|
2503 // cloning a loop body, based on the amount of control and data flow reaching |
|
2504 // outside of the (current) loop body. |
|
2505 uint IdealLoopTree::est_loop_flow_merge_sz() const { |
|
2506 |
2474 uint ctrl_edge_out_cnt = 0; |
2507 uint ctrl_edge_out_cnt = 0; |
2475 uint data_edge_out_cnt = 0; |
2508 uint data_edge_out_cnt = 0; |
2476 |
2509 |
2477 for (uint i = 0; i < _body.size(); i++) { |
2510 for (uint i = 0; i < _body.size(); i++) { |
2478 Node* node = _body.at(i); |
2511 Node* node = _body.at(i); |
2492 data_edge_out_cnt++; |
2525 data_edge_out_cnt++; |
2493 } |
2526 } |
2494 } |
2527 } |
2495 } |
2528 } |
2496 } |
2529 } |
2497 // Add data and control count (x2.0) to estimate iff both are > 0. This is |
2530 // Use data and control count (x2.0) in estimate iff both are > 0. This is |
2498 // a rather pessimistic estimate for the most part, in particular for some |
2531 // a rather pessimistic estimate for the most part, in particular for some |
2499 // complex loops, but still not enough to capture all loops. |
2532 // complex loops, but still not enough to capture all loops. |
2500 if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) { |
2533 if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) { |
2501 estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt); |
2534 return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt); |
2502 } |
2535 } |
2503 |
2536 return 0; |
2504 return estimate; |
|
2505 } |
2537 } |
2506 |
2538 |
2507 #ifndef PRODUCT |
2539 #ifndef PRODUCT |
2508 //------------------------------dump_head-------------------------------------- |
2540 //------------------------------dump_head-------------------------------------- |
2509 // Dump 1 liner for loop header info |
2541 // Dump 1 liner for loop header info |
2510 void IdealLoopTree::dump_head() const { |
2542 void IdealLoopTree::dump_head() const { |
2511 for (uint i = 0; i < _nest; i++) { |
2543 tty->sp(2 * _nest); |
2512 tty->print(" "); |
2544 tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx); |
2513 } |
|
2514 tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx); |
|
2515 if (_irreducible) tty->print(" IRREDUCIBLE"); |
2545 if (_irreducible) tty->print(" IRREDUCIBLE"); |
2516 Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl); |
2546 Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl); |
2517 Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); |
2547 Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); |
2518 if (predicate != NULL ) { |
2548 if (predicate != NULL ) { |
2519 tty->print(" limit_check"); |
2549 tty->print(" limit_check"); |
4499 } |
4529 } |
4500 #endif |
4530 #endif |
4501 |
4531 |
4502 #ifndef PRODUCT |
4532 #ifndef PRODUCT |
4503 //------------------------------dump------------------------------------------- |
4533 //------------------------------dump------------------------------------------- |
4504 void PhaseIdealLoop::dump( ) const { |
4534 void PhaseIdealLoop::dump() const { |
4505 ResourceMark rm; |
4535 ResourceMark rm; |
4506 Arena* arena = Thread::current()->resource_area(); |
4536 Arena* arena = Thread::current()->resource_area(); |
4507 Node_Stack stack(arena, C->live_nodes() >> 2); |
4537 Node_Stack stack(arena, C->live_nodes() >> 2); |
4508 Node_List rpo_list; |
4538 Node_List rpo_list; |
4509 VectorSet visited(arena); |
4539 VectorSet visited(arena); |
4510 visited.set(C->top()->_idx); |
4540 visited.set(C->top()->_idx); |
4511 rpo( C->root(), stack, visited, rpo_list ); |
4541 rpo(C->root(), stack, visited, rpo_list); |
4512 // Dump root loop indexed by last element in PO order |
4542 // Dump root loop indexed by last element in PO order |
4513 dump( _ltree_root, rpo_list.size(), rpo_list ); |
4543 dump(_ltree_root, rpo_list.size(), rpo_list); |
4514 } |
4544 } |
4515 |
4545 |
4516 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { |
4546 void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const { |
4517 loop->dump_head(); |
4547 loop->dump_head(); |
4518 |
4548 |
4519 // Now scan for CFG nodes in the same loop |
4549 // Now scan for CFG nodes in the same loop |
4520 for( uint j=idx; j > 0; j-- ) { |
4550 for (uint j = idx; j > 0; j--) { |
4521 Node *n = rpo_list[j-1]; |
4551 Node* n = rpo_list[j-1]; |
4522 if( !_nodes[n->_idx] ) // Skip dead nodes |
4552 if (!_nodes[n->_idx]) // Skip dead nodes |
4523 continue; |
4553 continue; |
4524 if( get_loop(n) != loop ) { // Wrong loop nest |
4554 |
4525 if( get_loop(n)->_head == n && // Found nested loop? |
4555 if (get_loop(n) != loop) { // Wrong loop nest |
4526 get_loop(n)->_parent == loop ) |
4556 if (get_loop(n)->_head == n && // Found nested loop? |
4527 dump(get_loop(n),rpo_list.size(),rpo_list); // Print it nested-ly |
4557 get_loop(n)->_parent == loop) |
|
4558 dump(get_loop(n), rpo_list.size(), rpo_list); // Print it nested-ly |
4528 continue; |
4559 continue; |
4529 } |
4560 } |
4530 |
4561 |
4531 // Dump controlling node |
4562 // Dump controlling node |
4532 for( uint x = 0; x < loop->_nest; x++ ) |
4563 tty->sp(2 * loop->_nest); |
4533 tty->print(" "); |
|
4534 tty->print("C"); |
4564 tty->print("C"); |
4535 if( n == C->root() ) { |
4565 if (n == C->root()) { |
4536 n->dump(); |
4566 n->dump(); |
4537 } else { |
4567 } else { |
4538 Node* cached_idom = idom_no_update(n); |
4568 Node* cached_idom = idom_no_update(n); |
4539 Node *computed_idom = n->in(0); |
4569 Node* computed_idom = n->in(0); |
4540 if( n->is_Region() ) { |
4570 if (n->is_Region()) { |
4541 computed_idom = compute_idom(n); |
4571 computed_idom = compute_idom(n); |
4542 // computed_idom() will return n->in(0) when idom(n) is an IfNode (or |
4572 // computed_idom() will return n->in(0) when idom(n) is an IfNode (or |
4543 // any MultiBranch ctrl node), so apply a similar transform to |
4573 // any MultiBranch ctrl node), so apply a similar transform to |
4544 // the cached idom returned from idom_no_update. |
4574 // the cached idom returned from idom_no_update. |
4545 cached_idom = find_non_split_ctrl(cached_idom); |
4575 cached_idom = find_non_split_ctrl(cached_idom); |
4546 } |
4576 } |
4547 tty->print(" ID:%d",computed_idom->_idx); |
4577 tty->print(" ID:%d", computed_idom->_idx); |
4548 n->dump(); |
4578 n->dump(); |
4549 if( cached_idom != computed_idom ) { |
4579 if (cached_idom != computed_idom) { |
4550 tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", |
4580 tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", |
4551 computed_idom->_idx, cached_idom->_idx); |
4581 computed_idom->_idx, cached_idom->_idx); |
4552 } |
4582 } |
4553 } |
4583 } |
4554 // Dump nodes it controls |
4584 // Dump nodes it controls |
4555 for( uint k = 0; k < _nodes.Size(); k++ ) { |
4585 for (uint k = 0; k < _nodes.Size(); k++) { |
4556 // (k < C->unique() && get_ctrl(find(k)) == n) |
4586 // (k < C->unique() && get_ctrl(find(k)) == n) |
4557 if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) { |
4587 if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) { |
4558 Node *m = C->root()->find(k); |
4588 Node* m = C->root()->find(k); |
4559 if( m && m->outcnt() > 0 ) { |
4589 if (m && m->outcnt() > 0) { |
4560 if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) { |
4590 if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) { |
4561 tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p", |
4591 tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p", |
4562 _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL); |
4592 _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL); |
4563 } |
4593 } |
4564 for( uint j = 0; j < loop->_nest; j++ ) |
4594 tty->sp(2 * loop->_nest + 1); |
4565 tty->print(" "); |
|
4566 tty->print(" "); |
|
4567 m->dump(); |
4595 m->dump(); |
4568 } |
4596 } |
4569 } |
4597 } |
4570 } |
4598 } |
4571 } |
4599 } |
4572 } |
4600 } |
4573 #endif |
4601 #endif |
4574 |
4602 |
4575 // Collect a R-P-O for the whole CFG. |
4603 // Collect a R-P-O for the whole CFG. |
4576 // Result list is in post-order (scan backwards for RPO) |
4604 // Result list is in post-order (scan backwards for RPO) |
4577 void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const { |
4605 void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const { |
4578 stk.push(start, 0); |
4606 stk.push(start, 0); |
4579 visited.set(start->_idx); |
4607 visited.set(start->_idx); |
4580 |
4608 |
4581 while (stk.is_nonempty()) { |
4609 while (stk.is_nonempty()) { |
4582 Node* m = stk.node(); |
4610 Node* m = stk.node(); |