3157 VectorSet not_peel(area); |
3157 VectorSet not_peel(area); |
3158 Node_List peel_list(area); |
3158 Node_List peel_list(area); |
3159 Node_List worklist(area); |
3159 Node_List worklist(area); |
3160 Node_List sink_list(area); |
3160 Node_List sink_list(area); |
3161 |
3161 |
3162 if (!may_require_nodes(loop->est_loop_clone_sz(2))) { |
3162 uint estimate = loop->est_loop_clone_sz(1); |
|
3163 if (exceeding_node_budget(estimate)) { |
3163 return false; |
3164 return false; |
3164 } |
3165 } |
3165 |
3166 |
3166 // Set of cfg nodes to peel are those that are executable from |
3167 // Set of cfg nodes to peel are those that are executable from |
3167 // the head through last_peel. |
3168 // the head through last_peel. |
3182 } |
3183 } |
3183 } |
3184 } |
3184 |
3185 |
3185 // Set of non-cfg nodes to peel are those that are control |
3186 // Set of non-cfg nodes to peel are those that are control |
3186 // dependent on the cfg nodes. |
3187 // dependent on the cfg nodes. |
3187 uint i; |
3188 for (uint i = 0; i < loop->_body.size(); i++) { |
3188 for(i = 0; i < loop->_body.size(); i++ ) { |
|
3189 Node *n = loop->_body.at(i); |
3189 Node *n = loop->_body.at(i); |
3190 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; |
3190 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; |
3191 if (peel.test(n_c->_idx)) { |
3191 if (peel.test(n_c->_idx)) { |
3192 peel.set(n->_idx); |
3192 peel.set(n->_idx); |
3193 } else { |
3193 } else { |
3198 // Step 2: move operations from the peeled section down into the |
3198 // Step 2: move operations from the peeled section down into the |
3199 // not-peeled section |
3199 // not-peeled section |
3200 |
3200 |
3201 // Get a post order schedule of nodes in the peel region |
3201 // Get a post order schedule of nodes in the peel region |
3202 // Result in right-most operand. |
3202 // Result in right-most operand. |
3203 scheduled_nodelist(loop, peel, peel_list ); |
3203 scheduled_nodelist(loop, peel, peel_list); |
3204 |
3204 |
3205 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); |
3205 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); |
3206 |
3206 |
3207 // For future check for too many new phis |
3207 // For future check for too many new phis |
3208 uint old_phi_cnt = 0; |
3208 uint old_phi_cnt = 0; |
3218 #endif |
3218 #endif |
3219 |
3219 |
3220 // Evacuate nodes in peel region into the not_peeled region if possible |
3220 // Evacuate nodes in peel region into the not_peeled region if possible |
3221 uint new_phi_cnt = 0; |
3221 uint new_phi_cnt = 0; |
3222 uint cloned_for_outside_use = 0; |
3222 uint cloned_for_outside_use = 0; |
3223 for (i = 0; i < peel_list.size();) { |
3223 for (uint i = 0; i < peel_list.size();) { |
3224 Node* n = peel_list.at(i); |
3224 Node* n = peel_list.at(i); |
3225 #ifndef PRODUCT |
3225 #ifndef PRODUCT |
3226 if (TracePartialPeeling) n->dump(); |
3226 if (TracePartialPeeling) n->dump(); |
3227 #endif |
3227 #endif |
3228 bool incr = true; |
3228 bool incr = true; |
3229 if ( !n->is_CFG() ) { |
3229 if (!n->is_CFG()) { |
3230 |
3230 if (has_use_in_set(n, not_peel)) { |
3231 if ( has_use_in_set(n, not_peel) ) { |
|
3232 |
|
3233 // If not used internal to the peeled region, |
3231 // If not used internal to the peeled region, |
3234 // move "n" from peeled to not_peeled region. |
3232 // move "n" from peeled to not_peeled region. |
3235 |
3233 if (!has_use_internal_to_set(n, peel, loop)) { |
3236 if ( !has_use_internal_to_set(n, peel, loop) ) { |
|
3237 |
|
3238 // if not pinned and not a load (which maybe anti-dependent on a store) |
3234 // if not pinned and not a load (which maybe anti-dependent on a store) |
3239 // and not a CMove (Matcher expects only bool->cmove). |
3235 // and not a CMove (Matcher expects only bool->cmove). |
3240 if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) { |
3236 if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) { |
3241 cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist ); |
3237 cloned_for_outside_use += clone_for_use_outside_loop(loop, n, worklist); |
3242 sink_list.push(n); |
3238 sink_list.push(n); |
3243 peel >>= n->_idx; // delete n from peel set. |
3239 peel >>= n->_idx; // delete n from peel set. |
3244 not_peel <<= n->_idx; // add n to not_peel set. |
3240 not_peel <<= n->_idx; // add n to not_peel set. |
3245 peel_list.remove(i); |
3241 peel_list.remove(i); |
3246 incr = false; |
3242 incr = false; |
3252 #endif |
3248 #endif |
3253 } |
3249 } |
3254 } else { |
3250 } else { |
3255 // Otherwise check for special def-use cases that span |
3251 // Otherwise check for special def-use cases that span |
3256 // the peel/not_peel boundary such as bool->if |
3252 // the peel/not_peel boundary such as bool->if |
3257 clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist ); |
3253 clone_for_special_use_inside_loop(loop, n, not_peel, sink_list, worklist); |
3258 new_phi_cnt++; |
3254 new_phi_cnt++; |
3259 } |
3255 } |
3260 } |
3256 } |
3261 } |
3257 } |
3262 if (incr) i++; |
3258 if (incr) i++; |
3263 } |
3259 } |
3264 |
3260 |
3265 if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) { |
3261 estimate += cloned_for_outside_use + new_phi_cnt; |
|
3262 bool exceed_node_budget = !may_require_nodes(estimate); |
|
3263 bool exceed_phi_limit = new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta; |
|
3264 |
|
3265 if (exceed_node_budget || exceed_phi_limit) { |
3266 #ifndef PRODUCT |
3266 #ifndef PRODUCT |
3267 if (TracePartialPeeling) { |
3267 if (TracePartialPeeling) { |
3268 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", |
3268 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", |
3269 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); |
3269 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); |
3270 } |
3270 } |
3308 |
3308 |
3309 clone_loop(loop, old_new, dd, IgnoreStripMined); |
3309 clone_loop(loop, old_new, dd, IgnoreStripMined); |
3310 |
3310 |
3311 const uint clone_exit_idx = 1; |
3311 const uint clone_exit_idx = 1; |
3312 const uint orig_exit_idx = 2; |
3312 const uint orig_exit_idx = 2; |
3313 assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop"); |
3313 assert(is_valid_clone_loop_form(loop, peel_list, orig_exit_idx, clone_exit_idx), "bad clone loop"); |
3314 |
3314 |
3315 Node* head_clone = old_new[head->_idx]; |
3315 Node* head_clone = old_new[head->_idx]; |
3316 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); |
3316 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); |
3317 Node* orig_tail_clone = head_clone->in(2); |
3317 Node* orig_tail_clone = head_clone->in(2); |
3318 |
3318 |
3319 // Add phi if "def" node is in peel set and "use" is not |
3319 // Add phi if "def" node is in peel set and "use" is not |
3320 |
3320 |
3321 for(i = 0; i < peel_list.size(); i++ ) { |
3321 for (uint i = 0; i < peel_list.size(); i++) { |
3322 Node *def = peel_list.at(i); |
3322 Node *def = peel_list.at(i); |
3323 if (!def->is_CFG()) { |
3323 if (!def->is_CFG()) { |
3324 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
3324 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
3325 Node *use = def->fast_out(j); |
3325 Node *use = def->fast_out(j); |
3326 if (has_node(use) && use->in(0) != C->top() && |
3326 if (has_node(use) && use->in(0) != C->top() && |
3372 // cloned-peeled <---+ |
3372 // cloned-peeled <---+ |
3373 // new_head_clone: | <--+ |
3373 // new_head_clone: | <--+ |
3374 // cloned-not_peeled in(0) in(0) |
3374 // cloned-not_peeled in(0) in(0) |
3375 // orig-peeled |
3375 // orig-peeled |
3376 |
3376 |
3377 for(i = 0; i < loop->_body.size(); i++ ) { |
3377 for (uint i = 0; i < loop->_body.size(); i++) { |
3378 Node *n = loop->_body.at(i); |
3378 Node *n = loop->_body.at(i); |
3379 if (!n->is_CFG() && n->in(0) != NULL && |
3379 if (!n->is_CFG() && n->in(0) != NULL && |
3380 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { |
3380 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { |
3381 Node* n_clone = old_new[n->_idx]; |
3381 Node* n_clone = old_new[n->_idx]; |
3382 _igvn.replace_input_of(n_clone, 0, new_head_clone); |
3382 _igvn.replace_input_of(n_clone, 0, new_head_clone); |