2437 assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops"); |
2438 assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops"); |
2438 if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase); |
2439 if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase); |
2439 if (loop->_next) loop->_next ->counted_loop(phase); |
2440 if (loop->_next) loop->_next ->counted_loop(phase); |
2440 } |
2441 } |
2441 |
2442 |
|
2443 |
|
2444 // The Estimated Loop Clone Size: |
|
2445 // CloneFactor * (~112% * BodySize + BC) + CC + FanOutTerm, |
|
2446 // where BC and CC are totally ad-hoc/magic "body" and "clone" constants, |
|
2447 // respectively, used to ensure that the node usage estimates made are on the |
|
2448 // safe side, for the most part. The FanOutTerm is an attempt to estimate the |
|
2449 // possible additional/excessive nodes generated due to data and control flow |
|
2450 // merging, for edges reaching outside the loop. |
|
2451 uint IdealLoopTree::est_loop_clone_sz(uint factor) const { |
|
2452 |
|
2453 precond(0 < factor && factor < 16); |
|
2454 |
|
2455 uint const bc = 13; |
|
2456 uint const cc = 17; |
|
2457 uint const sz = _body.size() + (_body.size() + 7) / 8; |
|
2458 uint estimate = factor * (sz + bc) + cc; |
|
2459 |
|
2460 assert((estimate - cc) / factor == sz + bc, "overflow"); |
|
2461 |
|
2462 return estimate + est_loop_flow_merge_sz(); |
|
2463 } |
|
2464 |
|
2465 // The Estimated Loop (full-) Unroll Size: |
|
2466 // UnrollFactor * (~106% * BodySize) + CC + FanOutTerm, |
|
2467 // where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that |
|
2468 // node usage estimates made are on the safe side, for the most part. This is |
|
2469 // a "light" version of the loop clone size calculation (above), based on the |
|
2470 // assumption that most of the loop-construct overhead will be unraveled when |
|
2471 // (fully) unrolled. Defined for unroll factors larger or equal to one (>=1), |
|
2472 // including an overflow check and returning UINT_MAX in case of an overflow. |
|
2473 uint IdealLoopTree::est_loop_unroll_sz(uint factor) const { |
|
2474 |
|
2475 precond(factor > 0); |
|
2476 |
|
2477 // Take into account that after unroll conjoined heads and tails will fold. |
|
2478 uint const b0 = _body.size() - EMPTY_LOOP_SIZE; |
|
2479 uint const cc = 7; |
|
2480 uint const sz = b0 + (b0 + 15) / 16; |
|
2481 uint estimate = factor * sz + cc; |
|
2482 |
|
2483 if ((estimate - cc) / factor != sz) { |
|
2484 return UINT_MAX; |
|
2485 } |
|
2486 |
|
2487 return estimate + est_loop_flow_merge_sz(); |
|
2488 } |
|
2489 |
|
2490 // Estimate the growth effect (in nodes) of merging control and data flow when |
|
2491 // cloning a loop body, based on the amount of control and data flow reaching |
|
2492 // outside of the (current) loop body. |
|
2493 uint IdealLoopTree::est_loop_flow_merge_sz() const { |
|
2494 |
|
2495 uint ctrl_edge_out_cnt = 0; |
|
2496 uint data_edge_out_cnt = 0; |
|
2497 |
|
2498 for (uint i = 0; i < _body.size(); i++) { |
|
2499 Node* node = _body.at(i); |
|
2500 uint outcnt = node->outcnt(); |
|
2501 |
|
2502 for (uint k = 0; k < outcnt; k++) { |
|
2503 Node* out = node->raw_out(k); |
|
2504 |
|
2505 if (out->is_CFG()) { |
|
2506 if (!is_member(_phase->get_loop(out))) { |
|
2507 ctrl_edge_out_cnt++; |
|
2508 } |
|
2509 } else { |
|
2510 Node* ctrl = _phase->get_ctrl(out); |
|
2511 assert(ctrl->is_CFG(), "must be"); |
|
2512 if (!is_member(_phase->get_loop(ctrl))) { |
|
2513 data_edge_out_cnt++; |
|
2514 } |
|
2515 } |
|
2516 } |
|
2517 } |
|
2518 // Use data and control count (x2.0) in estimate iff both are > 0. This is |
|
2519 // a rather pessimistic estimate for the most part, in particular for some |
|
2520 // complex loops, but still not enough to capture all loops. |
|
2521 if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) { |
|
2522 return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt); |
|
2523 } |
|
2524 return 0; |
|
2525 } |
|
2526 |
2442 #ifndef PRODUCT |
2527 #ifndef PRODUCT |
2443 //------------------------------dump_head-------------------------------------- |
2528 //------------------------------dump_head-------------------------------------- |
2444 // Dump 1 liner for loop header info |
2529 // Dump 1 liner for loop header info |
2445 void IdealLoopTree::dump_head( ) const { |
2530 void IdealLoopTree::dump_head() const { |
2446 for (uint i=0; i<_nest; i++) |
2531 tty->sp(2 * _nest); |
2447 tty->print(" "); |
2532 tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx); |
2448 tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx); |
|
2449 if (_irreducible) tty->print(" IRREDUCIBLE"); |
2533 if (_irreducible) tty->print(" IRREDUCIBLE"); |
2450 Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl); |
2534 Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl); |
2451 Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); |
2535 Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check); |
2452 if (predicate != NULL ) { |
2536 if (predicate != NULL ) { |
2453 tty->print(" limit_check"); |
2537 tty->print(" limit_check"); |
2936 } |
3016 } |
2937 return; |
3017 return; |
2938 } |
3018 } |
2939 |
3019 |
2940 if (ReassociateInvariants) { |
3020 if (ReassociateInvariants) { |
2941 AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK); |
|
2942 // Reassociate invariants and prep for split_thru_phi |
3021 // Reassociate invariants and prep for split_thru_phi |
2943 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { |
3022 for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { |
2944 IdealLoopTree* lpt = iter.current(); |
3023 IdealLoopTree* lpt = iter.current(); |
2945 bool is_counted = lpt->is_counted(); |
3024 bool is_counted = lpt->is_counted(); |
2946 if (!is_counted || !lpt->is_innermost()) continue; |
3025 if (!is_counted || !lpt->is_innermost()) continue; |
2947 |
3026 |
2948 // check for vectorized loops, any reassociation of invariants was already done |
3027 // check for vectorized loops, any reassociation of invariants was already done |
2949 if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) continue; |
3028 if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) { |
2950 |
3029 continue; |
2951 lpt->reassociate_invariants(this); |
3030 } else { |
2952 |
3031 AutoNodeBudget node_budget(this); |
|
3032 lpt->reassociate_invariants(this); |
|
3033 } |
2953 // Because RCE opportunities can be masked by split_thru_phi, |
3034 // Because RCE opportunities can be masked by split_thru_phi, |
2954 // look for RCE candidates and inhibit split_thru_phi |
3035 // look for RCE candidates and inhibit split_thru_phi |
2955 // on just their loop-phi's for this pass of loop opts |
3036 // on just their loop-phi's for this pass of loop opts |
2956 if (SplitIfBlocks && do_split_ifs) { |
3037 if (SplitIfBlocks && do_split_ifs) { |
|
3038 AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK); |
2957 if (lpt->policy_range_check(this)) { |
3039 if (lpt->policy_range_check(this)) { |
2958 lpt->_rce_candidate = 1; // = true |
3040 lpt->_rce_candidate = 1; // = true |
2959 } |
3041 } |
2960 } |
3042 } |
2961 } |
3043 } |
3957 // instructions and for rescheduling the load. The users of the memory |
4035 // instructions and for rescheduling the load. The users of the memory |
3958 // input of this load are examined. Any use which is not a load and is |
4036 // input of this load are examined. Any use which is not a load and is |
3959 // dominated by early is considered a potentially interfering store. |
4037 // dominated by early is considered a potentially interfering store. |
3960 // This can produce false positives. |
4038 // This can produce false positives. |
3961 if (n->is_Load() && LCA != early) { |
4039 if (n->is_Load() && LCA != early) { |
3962 Node_List worklist; |
4040 int load_alias_idx = C->get_alias_index(n->adr_type()); |
3963 |
4041 if (C->alias_type(load_alias_idx)->is_rewritable()) { |
3964 Node *mem = n->in(MemNode::Memory); |
4042 |
3965 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
4043 Node_List worklist; |
3966 Node* s = mem->fast_out(i); |
4044 |
3967 worklist.push(s); |
4045 Node *mem = n->in(MemNode::Memory); |
3968 } |
4046 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { |
3969 while(worklist.size() != 0 && LCA != early) { |
4047 Node* s = mem->fast_out(i); |
3970 Node* s = worklist.pop(); |
4048 worklist.push(s); |
3971 if (s->is_Load() || s->Opcode() == Op_SafePoint || |
4049 } |
3972 (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) { |
4050 while(worklist.size() != 0 && LCA != early) { |
3973 continue; |
4051 Node* s = worklist.pop(); |
3974 } else if (s->is_MergeMem()) { |
4052 if (s->is_Load() || s->Opcode() == Op_SafePoint || |
3975 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { |
4053 (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0)) { |
3976 Node* s1 = s->fast_out(i); |
4054 continue; |
3977 worklist.push(s1); |
4055 } else if (s->is_MergeMem()) { |
3978 } |
4056 for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { |
3979 } else { |
4057 Node* s1 = s->fast_out(i); |
3980 Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0); |
4058 worklist.push(s1); |
3981 assert(sctrl != NULL || s->outcnt() == 0, "must have control"); |
4059 } |
3982 if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) { |
4060 } else { |
3983 LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n); |
4061 Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0); |
|
4062 assert(sctrl != NULL || s->outcnt() == 0, "must have control"); |
|
4063 if (sctrl != NULL && !sctrl->is_top() && C->can_alias(s->adr_type(), load_alias_idx) && is_dominator(early, sctrl)) { |
|
4064 LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n); |
|
4065 } |
3984 } |
4066 } |
3985 } |
4067 } |
3986 } |
4068 } |
3987 } |
4069 } |
3988 |
4070 |
4441 } |
4521 } |
4442 #endif |
4522 #endif |
4443 |
4523 |
4444 #ifndef PRODUCT |
4524 #ifndef PRODUCT |
4445 //------------------------------dump------------------------------------------- |
4525 //------------------------------dump------------------------------------------- |
4446 void PhaseIdealLoop::dump( ) const { |
4526 void PhaseIdealLoop::dump() const { |
4447 ResourceMark rm; |
4527 ResourceMark rm; |
4448 Arena* arena = Thread::current()->resource_area(); |
4528 Arena* arena = Thread::current()->resource_area(); |
4449 Node_Stack stack(arena, C->live_nodes() >> 2); |
4529 Node_Stack stack(arena, C->live_nodes() >> 2); |
4450 Node_List rpo_list; |
4530 Node_List rpo_list; |
4451 VectorSet visited(arena); |
4531 VectorSet visited(arena); |
4452 visited.set(C->top()->_idx); |
4532 visited.set(C->top()->_idx); |
4453 rpo( C->root(), stack, visited, rpo_list ); |
4533 rpo(C->root(), stack, visited, rpo_list); |
4454 // Dump root loop indexed by last element in PO order |
4534 // Dump root loop indexed by last element in PO order |
4455 dump( _ltree_root, rpo_list.size(), rpo_list ); |
4535 dump(_ltree_root, rpo_list.size(), rpo_list); |
4456 } |
4536 } |
4457 |
4537 |
4458 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const { |
4538 void PhaseIdealLoop::dump(IdealLoopTree* loop, uint idx, Node_List &rpo_list) const { |
4459 loop->dump_head(); |
4539 loop->dump_head(); |
4460 |
4540 |
4461 // Now scan for CFG nodes in the same loop |
4541 // Now scan for CFG nodes in the same loop |
4462 for( uint j=idx; j > 0; j-- ) { |
4542 for (uint j = idx; j > 0; j--) { |
4463 Node *n = rpo_list[j-1]; |
4543 Node* n = rpo_list[j-1]; |
4464 if( !_nodes[n->_idx] ) // Skip dead nodes |
4544 if (!_nodes[n->_idx]) // Skip dead nodes |
4465 continue; |
4545 continue; |
4466 if( get_loop(n) != loop ) { // Wrong loop nest |
4546 |
4467 if( get_loop(n)->_head == n && // Found nested loop? |
4547 if (get_loop(n) != loop) { // Wrong loop nest |
4468 get_loop(n)->_parent == loop ) |
4548 if (get_loop(n)->_head == n && // Found nested loop? |
4469 dump(get_loop(n),rpo_list.size(),rpo_list); // Print it nested-ly |
4549 get_loop(n)->_parent == loop) |
|
4550 dump(get_loop(n), rpo_list.size(), rpo_list); // Print it nested-ly |
4470 continue; |
4551 continue; |
4471 } |
4552 } |
4472 |
4553 |
4473 // Dump controlling node |
4554 // Dump controlling node |
4474 for( uint x = 0; x < loop->_nest; x++ ) |
4555 tty->sp(2 * loop->_nest); |
4475 tty->print(" "); |
|
4476 tty->print("C"); |
4556 tty->print("C"); |
4477 if( n == C->root() ) { |
4557 if (n == C->root()) { |
4478 n->dump(); |
4558 n->dump(); |
4479 } else { |
4559 } else { |
4480 Node* cached_idom = idom_no_update(n); |
4560 Node* cached_idom = idom_no_update(n); |
4481 Node *computed_idom = n->in(0); |
4561 Node* computed_idom = n->in(0); |
4482 if( n->is_Region() ) { |
4562 if (n->is_Region()) { |
4483 computed_idom = compute_idom(n); |
4563 computed_idom = compute_idom(n); |
4484 // computed_idom() will return n->in(0) when idom(n) is an IfNode (or |
4564 // computed_idom() will return n->in(0) when idom(n) is an IfNode (or |
4485 // any MultiBranch ctrl node), so apply a similar transform to |
4565 // any MultiBranch ctrl node), so apply a similar transform to |
4486 // the cached idom returned from idom_no_update. |
4566 // the cached idom returned from idom_no_update. |
4487 cached_idom = find_non_split_ctrl(cached_idom); |
4567 cached_idom = find_non_split_ctrl(cached_idom); |
4488 } |
4568 } |
4489 tty->print(" ID:%d",computed_idom->_idx); |
4569 tty->print(" ID:%d", computed_idom->_idx); |
4490 n->dump(); |
4570 n->dump(); |
4491 if( cached_idom != computed_idom ) { |
4571 if (cached_idom != computed_idom) { |
4492 tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", |
4572 tty->print_cr("*** BROKEN IDOM! Computed as: %d, cached as: %d", |
4493 computed_idom->_idx, cached_idom->_idx); |
4573 computed_idom->_idx, cached_idom->_idx); |
4494 } |
4574 } |
4495 } |
4575 } |
4496 // Dump nodes it controls |
4576 // Dump nodes it controls |
4497 for( uint k = 0; k < _nodes.Size(); k++ ) { |
4577 for (uint k = 0; k < _nodes.Size(); k++) { |
4498 // (k < C->unique() && get_ctrl(find(k)) == n) |
4578 // (k < C->unique() && get_ctrl(find(k)) == n) |
4499 if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) { |
4579 if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) { |
4500 Node *m = C->root()->find(k); |
4580 Node* m = C->root()->find(k); |
4501 if( m && m->outcnt() > 0 ) { |
4581 if (m && m->outcnt() > 0) { |
4502 if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) { |
4582 if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) { |
4503 tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p", |
4583 tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p", |
4504 _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL); |
4584 _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL); |
4505 } |
4585 } |
4506 for( uint j = 0; j < loop->_nest; j++ ) |
4586 tty->sp(2 * loop->_nest + 1); |
4507 tty->print(" "); |
|
4508 tty->print(" "); |
|
4509 m->dump(); |
4587 m->dump(); |
4510 } |
4588 } |
4511 } |
4589 } |
4512 } |
4590 } |
4513 } |
4591 } |
4514 } |
4592 } |
4515 #endif |
4593 #endif |
4516 |
4594 |
4517 // Collect a R-P-O for the whole CFG. |
4595 // Collect a R-P-O for the whole CFG. |
4518 // Result list is in post-order (scan backwards for RPO) |
4596 // Result list is in post-order (scan backwards for RPO) |
4519 void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const { |
4597 void PhaseIdealLoop::rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const { |
4520 stk.push(start, 0); |
4598 stk.push(start, 0); |
4521 visited.set(start->_idx); |
4599 visited.set(start->_idx); |
4522 |
4600 |
4523 while (stk.is_nonempty()) { |
4601 while (stk.is_nonempty()) { |
4524 Node* m = stk.node(); |
4602 Node* m = stk.node(); |