src/hotspot/share/opto/loopTransform.cpp
changeset 50623 5209d8a6303e
parent 50561 5756e8eecb17
child 50632 fd430e352427
equal deleted inserted replaced
50622:21b96ce2ed10 50623:5209d8a6303e
   135 }
   135 }
   136 
   136 
   137 //------------------------------compute_profile_trip_cnt----------------------------
   137 //------------------------------compute_profile_trip_cnt----------------------------
   138 // Compute loop trip count from profile data as
   138 // Compute loop trip count from profile data as
   139 //    (backedge_count + loop_exit_count) / loop_exit_count
   139 //    (backedge_count + loop_exit_count) / loop_exit_count
   140 void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
   140 
   141   if (!_head->is_CountedLoop()) {
   141 float IdealLoopTree::compute_profile_trip_cnt_helper(Node* n) {
       
   142   if (n->is_If()) {
       
   143     IfNode *iff = n->as_If();
       
   144     if (iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN) {
       
   145       Node *exit = is_loop_exit(iff);
       
   146       if (exit) {
       
   147         float exit_prob = iff->_prob;
       
   148         if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
       
   149         if (exit_prob > PROB_MIN) {
       
   150           float exit_cnt = iff->_fcnt * exit_prob;
       
   151           return exit_cnt;
       
   152         }
       
   153       }
       
   154     }
       
   155   }
       
   156   if (n->is_Jump()) {
       
   157     JumpNode *jmp = n->as_Jump();
       
   158     if (jmp->_fcnt != COUNT_UNKNOWN) {
       
   159       float* probs = jmp->_probs;
       
   160       float exit_prob = 0;
       
   161       PhaseIdealLoop *phase = _phase;
       
   162       for (DUIterator_Fast imax, i = jmp->fast_outs(imax); i < imax; i++) {
       
   163         JumpProjNode* u = jmp->fast_out(i)->as_JumpProj();
       
   164         if (!is_member(_phase->get_loop(u))) {
       
   165           exit_prob += probs[u->_con];
       
   166         }
       
   167       }
       
   168       return exit_prob * jmp->_fcnt;
       
   169     }
       
   170   }
       
   171   return 0;
       
   172 }
       
   173 
       
   174 void IdealLoopTree::compute_profile_trip_cnt(PhaseIdealLoop *phase) {
       
   175   if (!_head->is_Loop()) {
   142     return;
   176     return;
   143   }
   177   }
   144   CountedLoopNode* head = _head->as_CountedLoop();
   178   LoopNode* head = _head->as_Loop();
   145   if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
   179   if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
   146     return; // Already computed
   180     return; // Already computed
   147   }
   181   }
   148   float trip_cnt = (float)max_jint; // default is big
   182   float trip_cnt = (float)max_jint; // default is big
   149 
   183 
   151   while (back != head) {
   185   while (back != head) {
   152     if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
   186     if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
   153         back->in(0) &&
   187         back->in(0) &&
   154         back->in(0)->is_If() &&
   188         back->in(0)->is_If() &&
   155         back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
   189         back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
   156         back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
   190         back->in(0)->as_If()->_prob != PROB_UNKNOWN &&
       
   191         (back->Opcode() == Op_IfTrue ? 1-back->in(0)->as_If()->_prob : back->in(0)->as_If()->_prob) > PROB_MIN) {
   157       break;
   192       break;
   158     }
   193     }
   159     back = phase->idom(back);
   194     back = phase->idom(back);
   160   }
   195   }
   161   if (back != head) {
   196   if (back != head) {
   162     assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
   197     assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
   163            back->in(0), "if-projection exists");
   198            back->in(0), "if-projection exists");
   164     IfNode* back_if = back->in(0)->as_If();
   199     IfNode* back_if = back->in(0)->as_If();
   165     float loop_back_cnt = back_if->_fcnt * back_if->_prob;
   200     float loop_back_cnt = back_if->_fcnt * (back->Opcode() == Op_IfTrue ? back_if->_prob : (1 - back_if->_prob));
   166 
   201 
   167     // Now compute a loop exit count
   202     // Now compute a loop exit count
   168     float loop_exit_cnt = 0.0f;
   203     float loop_exit_cnt = 0.0f;
   169     for( uint i = 0; i < _body.size(); i++ ) {
   204     if (_child == NULL) {
   170       Node *n = _body[i];
   205       for( uint i = 0; i < _body.size(); i++ ) {
   171       if( n->is_If() ) {
   206         Node *n = _body[i];
   172         IfNode *iff = n->as_If();
   207         loop_exit_cnt += compute_profile_trip_cnt_helper(n);
   173         if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
   208       }
   174           Node *exit = is_loop_exit(iff);
   209     } else {
   175           if( exit ) {
   210       ResourceMark rm;
   176             float exit_prob = iff->_prob;
   211       Unique_Node_List wq;
   177             if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
   212       wq.push(back);
   178             if (exit_prob > PROB_MIN) {
   213       for (uint i = 0; i < wq.size(); i++) {
   179               float exit_cnt = iff->_fcnt * exit_prob;
   214         Node *n = wq.at(i);
   180               loop_exit_cnt += exit_cnt;
   215         assert(n->is_CFG(), "only control nodes");
       
   216         if (n != head) {
       
   217           if (n->is_Region()) {
       
   218             for (uint j = 1; j < n->req(); j++) {
       
   219               wq.push(n->in(j));
   181             }
   220             }
       
   221           } else {
       
   222             loop_exit_cnt += compute_profile_trip_cnt_helper(n);
       
   223             wq.push(n->in(0));
   182           }
   224           }
   183         }
   225         }
   184       }
   226       }
       
   227 
   185     }
   228     }
   186     if (loop_exit_cnt > 0.0f) {
   229     if (loop_exit_cnt > 0.0f) {
   187       trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
   230       trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
   188     } else {
   231     } else {
   189       // No exit count so use
   232       // No exit count so use
   190       trip_cnt = loop_back_cnt;
   233       trip_cnt = loop_back_cnt;
   191     }
   234     }
       
   235   } else {
       
   236     head->mark_profile_trip_failed();
   192   }
   237   }
   193 #ifndef PRODUCT
   238 #ifndef PRODUCT
   194   if (TraceProfileTripCount) {
   239   if (TraceProfileTripCount) {
   195     tty->print_cr("compute_profile_trip_cnt  lp: %d cnt: %f\n", head->_idx, trip_cnt);
   240     tty->print_cr("compute_profile_trip_cnt  lp: %d cnt: %f\n", head->_idx, trip_cnt);
   196   }
   241   }
  1014 // loop is never executed). When that happens, range check
  1059 // loop is never executed). When that happens, range check
  1015 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
  1060 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
  1016 // the control paths must die too but the range checks were removed by
  1061 // the control paths must die too but the range checks were removed by
  1017 // predication. The range checks that we add here guarantee that they
  1062 // predication. The range checks that we add here guarantee that they
  1018 // do.
  1063 // do.
  1019 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* min_taken, Node* castii,
  1064 void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* castii, IdealLoopTree* outer_loop,
  1020                                           IdealLoopTree* outer_loop, LoopNode* outer_main_head,
  1065                                                  LoopNode* outer_main_head, uint dd_main_head) {
  1021                                           uint dd_main_head) {
  1066   if (predicate != NULL) {
       
  1067     IfNode* iff = predicate->in(0)->as_If();
       
  1068     ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
       
  1069     Node* rgn = uncommon_proj->unique_ctrl_out();
       
  1070     assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
       
  1071     assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
       
  1072     predicate = predicate->in(0)->in(0);
       
  1073     Node* current_proj = outer_main_head->in(LoopNode::EntryControl);
       
  1074     Node* prev_proj = current_proj;
       
  1075     while (predicate != NULL && predicate->is_Proj() && predicate->in(0)->is_If()) {
       
  1076       uncommon_proj = predicate->in(0)->as_If()->proj_out(1 - predicate->as_Proj()->_con);
       
  1077       if (uncommon_proj->unique_ctrl_out() != rgn)
       
  1078         break;
       
  1079       iff = predicate->in(0)->as_If();
       
  1080       if (iff->in(1)->Opcode() == Op_Opaque4) {
       
  1081         Node_Stack to_clone(2);
       
  1082         to_clone.push(iff->in(1), 1);
       
  1083         uint current = C->unique();
       
  1084         Node* result = NULL;
       
  1085         // Look for the opaque node to replace with the init value
       
  1086         // and clone everything in between. We keep the Opaque4 node
       
  1087         // so the duplicated predicates are eliminated once loop
       
  1088         // opts are over: they are here only to keep the IR graph
       
  1089         // consistent.
       
  1090         do {
       
  1091           Node* n = to_clone.node();
       
  1092           uint i = to_clone.index();
       
  1093           Node* m = n->in(i);
       
  1094           int op = m->Opcode();
       
  1095           if (m->is_Bool() ||
       
  1096               m->is_Cmp() ||
       
  1097               op == Op_AndL ||
       
  1098               op == Op_OrL ||
       
  1099               op == Op_RShiftL ||
       
  1100               op == Op_LShiftL ||
       
  1101               op == Op_AddL ||
       
  1102               op == Op_AddI ||
       
  1103               op == Op_MulL ||
       
  1104               op == Op_MulI ||
       
  1105               op == Op_SubL ||
       
  1106               op == Op_SubI ||
       
  1107               op == Op_ConvI2L) {
       
  1108             to_clone.push(m, 1);
       
  1109             continue;
       
  1110           }
       
  1111           if (op == Op_Opaque1) {
       
  1112             if (n->_idx < current) {
       
  1113               n = n->clone();
       
  1114             }
       
  1115             n->set_req(i, castii);
       
  1116             register_new_node(n, current_proj);
       
  1117             to_clone.set_node(n);
       
  1118           }
       
  1119           for (;;) {
       
  1120             Node* cur = to_clone.node();
       
  1121             uint j = to_clone.index();
       
  1122             if (j+1 < cur->req()) {
       
  1123               to_clone.set_index(j+1);
       
  1124               break;
       
  1125             }
       
  1126             to_clone.pop();
       
  1127             if (to_clone.size() == 0) {
       
  1128               result = cur;
       
  1129               break;
       
  1130             }
       
  1131             Node* next = to_clone.node();
       
  1132             j = to_clone.index();
       
  1133             if (cur->_idx >= current) {
       
  1134               if (next->_idx < current) {
       
  1135                 next = next->clone();
       
  1136                 register_new_node(next, current_proj);
       
  1137                 to_clone.set_node(next);
       
  1138               }
       
  1139               assert(next->in(j) != cur, "input should have been cloned");
       
  1140               next->set_req(j, cur);
       
  1141             }
       
  1142           }
       
  1143         } while (result == NULL);
       
  1144         assert(result->_idx >= current, "new node expected");
       
  1145 
       
  1146         Node* proj = predicate->clone();
       
  1147         Node* other_proj = uncommon_proj->clone();
       
  1148         Node* new_iff = iff->clone();
       
  1149         new_iff->set_req(1, result);
       
  1150         proj->set_req(0, new_iff);
       
  1151         other_proj->set_req(0, new_iff);
       
  1152         Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
       
  1153         register_new_node(frame, C->start());
       
  1154         // It's impossible for the predicate to fail at runtime. Use
       
  1155         // an Halt node.
       
  1156         Node* halt = new HaltNode(other_proj, frame);
       
  1157         C->root()->add_req(halt);
       
  1158         new_iff->set_req(0, prev_proj);
       
  1159 
       
  1160         register_control(new_iff, outer_loop->_parent, prev_proj);
       
  1161         register_control(proj, outer_loop->_parent, new_iff);
       
  1162         register_control(other_proj, _ltree_root, new_iff);
       
  1163         register_control(halt, _ltree_root, other_proj);
       
  1164 
       
  1165         prev_proj = proj;
       
  1166       }
       
  1167       predicate = predicate->in(0)->in(0);
       
  1168     }
       
  1169     if (prev_proj != current_proj) {
       
  1170       _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
       
  1171       set_idom(outer_main_head, prev_proj, dd_main_head);
       
  1172     }
       
  1173   }
       
  1174 }
       
  1175 
       
  1176 void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* castii, IdealLoopTree* outer_loop,
       
  1177                                           LoopNode* outer_main_head, uint dd_main_head) {
  1022   if (UseLoopPredicate) {
  1178   if (UseLoopPredicate) {
  1023     Node* entry = pre_head->in(LoopNode::EntryControl);
  1179     Node* entry = pre_head->in(LoopNode::EntryControl);
  1024     Node* predicate = NULL;
  1180     Node* predicate = NULL;
  1025     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
  1181     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
  1026     if (predicate != NULL) {
  1182     if (predicate != NULL) {
  1027       entry = entry->in(0)->in(0);
  1183       entry = entry->in(0)->in(0);
  1028     }
  1184     }
       
  1185     Node* profile_predicate = NULL;
       
  1186     if (UseProfiledLoopPredicate) {
       
  1187       profile_predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
       
  1188       if (profile_predicate != NULL) {
       
  1189         entry = skip_loop_predicates(entry);
       
  1190       }
       
  1191     }
  1029     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
  1192     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
  1030     if (predicate != NULL) {
  1193     duplicate_predicates_helper(predicate, castii, outer_loop, outer_main_head, dd_main_head);
  1031       IfNode* iff = entry->in(0)->as_If();
  1194     duplicate_predicates_helper(profile_predicate, castii, outer_loop, outer_main_head, dd_main_head);
  1032       ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
       
  1033       Node* rgn = uncommon_proj->unique_ctrl_out();
       
  1034       assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct");
       
  1035       assert(iff->in(1)->in(1)->Opcode() == Op_Opaque1, "unexpected predicate shape");
       
  1036       entry = entry->in(0)->in(0);
       
  1037       Node* prev_proj = min_taken;
       
  1038       while (entry != NULL && entry->is_Proj() && entry->in(0)->is_If()) {
       
  1039         uncommon_proj = entry->in(0)->as_If()->proj_out(1 - entry->as_Proj()->_con);
       
  1040         if (uncommon_proj->unique_ctrl_out() != rgn)
       
  1041           break;
       
  1042         iff = entry->in(0)->as_If();
       
  1043         if (iff->in(1)->Opcode() == Op_Opaque4) {
       
  1044           Node_Stack to_clone(2);
       
  1045           to_clone.push(iff->in(1), 1);
       
  1046           uint current = C->unique();
       
  1047           Node* result = NULL;
       
  1048           // Look for the opaque node to replace with the init value
       
  1049           // and clone everything in between. We keep the Opaque4 node
       
  1050           // so the duplicated predicates are eliminated once loop
       
  1051           // opts are over: they are here only to keep the IR graph
       
  1052           // consistent.
       
  1053           do {
       
  1054             Node* n = to_clone.node();
       
  1055             uint i = to_clone.index();
       
  1056             Node* m = n->in(i);
       
  1057             int op = m->Opcode();
       
  1058             if (m->is_Bool() ||
       
  1059                 m->is_Cmp() ||
       
  1060                 op == Op_AndL ||
       
  1061                 op == Op_OrL ||
       
  1062                 op == Op_RShiftL ||
       
  1063                 op == Op_LShiftL ||
       
  1064                 op == Op_AddL ||
       
  1065                 op == Op_AddI ||
       
  1066                 op == Op_MulL ||
       
  1067                 op == Op_MulI ||
       
  1068                 op == Op_SubL ||
       
  1069                 op == Op_SubI ||
       
  1070                 op == Op_ConvI2L) {
       
  1071               to_clone.push(m, 1);
       
  1072               continue;
       
  1073             }
       
  1074             if (op == Op_Opaque1) {
       
  1075               if (n->_idx < current) {
       
  1076                 n = n->clone();
       
  1077               }
       
  1078               n->set_req(i, castii);
       
  1079               register_new_node(n, min_taken);
       
  1080               to_clone.set_node(n);
       
  1081             }
       
  1082             for (;;) {
       
  1083               Node* cur = to_clone.node();
       
  1084               uint j = to_clone.index();
       
  1085               if (j+1 < cur->req()) {
       
  1086                 to_clone.set_index(j+1);
       
  1087                 break;
       
  1088               }
       
  1089               to_clone.pop();
       
  1090               if (to_clone.size() == 0) {
       
  1091                 result = cur;
       
  1092                 break;
       
  1093               }
       
  1094               Node* next = to_clone.node();
       
  1095               j = to_clone.index();
       
  1096               if (cur->_idx >= current) {
       
  1097                 if (next->_idx < current) {
       
  1098                   next = next->clone();
       
  1099                   register_new_node(next, min_taken);
       
  1100                   to_clone.set_node(next);
       
  1101                 }
       
  1102                 assert(next->in(j) != cur, "input should have been cloned");
       
  1103                 next->set_req(j, cur);
       
  1104               }
       
  1105             }
       
  1106           } while (result == NULL);
       
  1107           assert(result->_idx >= current, "new node expected");
       
  1108 
       
  1109           Node* proj = entry->clone();
       
  1110           Node* other_proj = uncommon_proj->clone();
       
  1111           Node* new_iff = iff->clone();
       
  1112           new_iff->set_req(1, result);
       
  1113           proj->set_req(0, new_iff);
       
  1114           other_proj->set_req(0, new_iff);
       
  1115           Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
       
  1116           register_new_node(frame, C->start());
       
  1117           // It's impossible for the predicate to fail at runtime. Use
       
  1118           // an Halt node.
       
  1119           Node* halt = new HaltNode(other_proj, frame);
       
  1120           C->root()->add_req(halt);
       
  1121           new_iff->set_req(0, prev_proj);
       
  1122 
       
  1123           register_control(new_iff, outer_loop->_parent, prev_proj);
       
  1124           register_control(proj, outer_loop->_parent, new_iff);
       
  1125           register_control(other_proj, _ltree_root, new_iff);
       
  1126           register_control(halt, _ltree_root, other_proj);
       
  1127 
       
  1128           prev_proj = proj;
       
  1129         }
       
  1130         entry = entry->in(0)->in(0);
       
  1131       }
       
  1132       _igvn.replace_input_of(outer_main_head, LoopNode::EntryControl, prev_proj);
       
  1133       set_idom(outer_main_head, prev_proj, dd_main_head);
       
  1134     }
       
  1135   }
  1195   }
  1136 }
  1196 }
  1137 
  1197 
  1138 //------------------------------insert_pre_post_loops--------------------------
  1198 //------------------------------insert_pre_post_loops--------------------------
  1139 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
  1199 // Insert pre and post loops.  If peel_only is set, the pre-loop can not have
  1276   // dependencies.
  1336   // dependencies.
  1277 
  1337 
  1278   // CastII for the main loop:
  1338   // CastII for the main loop:
  1279   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
  1339   Node* castii = cast_incr_before_loop( pre_incr, min_taken, main_head );
  1280   assert(castii != NULL, "no castII inserted");
  1340   assert(castii != NULL, "no castII inserted");
  1281   duplicate_predicates(pre_head, min_taken, castii, outer_loop, outer_main_head, dd_main_head);
  1341   duplicate_predicates(pre_head, castii, outer_loop, outer_main_head, dd_main_head);
  1282 
  1342 
  1283   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
  1343   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
  1284   // RCE and alignment may change this later.
  1344   // RCE and alignment may change this later.
  1285   Node *cmp_end = pre_end->cmp_node();
  1345   Node *cmp_end = pre_end->cmp_node();
  1286   assert( cmp_end->in(2) == limit, "" );
  1346   assert( cmp_end->in(2) == limit, "" );
  2813       needs_guard = (init_t->_lo <= limit_t->_hi);
  2873       needs_guard = (init_t->_lo <= limit_t->_hi);
  2814     }
  2874     }
  2815   }
  2875   }
  2816   if (needs_guard) {
  2876   if (needs_guard) {
  2817     // Check for an obvious zero trip guard.
  2877     // Check for an obvious zero trip guard.
  2818     Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_predicates());
  2878     Node* inctrl = PhaseIdealLoop::skip_all_loop_predicates(cl->skip_predicates());
  2819     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
  2879     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
  2820       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
  2880       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
  2821       // The test should look like just the backedge of a CountedLoop
  2881       // The test should look like just the backedge of a CountedLoop
  2822       Node* iff = inctrl->in(0);
  2882       Node* iff = inctrl->in(0);
  2823       if (iff->is_If()) {
  2883       if (iff->is_If()) {