src/hotspot/share/opto/loopTransform.cpp
changeset 54705 fc7627bf4b01
parent 54703 e09f3ad6147d
child 55150 ba171f871932
child 58678 9cf78a70fa4f
equal deleted inserted replaced
54704:3a79044dd980 54705:fc7627bf4b01
   344 //------------------------------policy_peeling---------------------------------
   344 //------------------------------policy_peeling---------------------------------
   345 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
   345 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
   346 // make some loop-invariant test (usually a null-check) happen before the loop.
   346 // make some loop-invariant test (usually a null-check) happen before the loop.
   347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
   347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
   348   IdealLoopTree *loop = (IdealLoopTree*)this;
   348   IdealLoopTree *loop = (IdealLoopTree*)this;
   349   Node *test = loop->tail();
   349 
   350   int body_size = loop->_body.size();
   350   // If nodes are depleted, some transform has miscalculated its needs.
       
   351   assert(!phase->exceeding_node_budget(), "sanity");
       
   352 
       
   353   uint body_size = loop->_body.size();
   351   // Peeling does loop cloning which can result in O(N^2) node construction
   354   // Peeling does loop cloning which can result in O(N^2) node construction
   352   if (body_size > 255 /* Prevent overflow for large body_size */
   355   if (body_size > 255) {
   353       || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit()) {
   356     return false;   // Prevent overflow for large body size
   354     return false;           // too large to safely clone
   357   }
       
   358   uint estimate = body_size * body_size;
       
   359   if (phase->exceeding_node_budget(estimate)) {
       
   360     return false;   // Too large to safely clone
   355   }
   361   }
   356 
   362 
   357   // check for vectorized loops, any peeling done was already applied
   363   // check for vectorized loops, any peeling done was already applied
   358   if (_head->is_CountedLoop()) {
   364   if (_head->is_CountedLoop()) {
   359     CountedLoopNode* cl = _head->as_CountedLoop();
   365     CountedLoopNode* cl = _head->as_CountedLoop();
   360     if (cl->is_unroll_only() || cl->trip_count() == 1) {
   366     if (cl->is_unroll_only() || cl->trip_count() == 1) {
   361       return false;
   367       return false;
   362     }
   368     }
   363   }
   369   }
       
   370 
       
   371   Node* test = loop->tail();
   364 
   372 
   365   while (test != _head) {       // Scan till run off top of loop
   373   while (test != _head) {       // Scan till run off top of loop
   366     if (test->is_If()) {        // Test?
   374     if (test->is_If()) {        // Test?
   367       Node *ctrl = phase->get_ctrl(test->in(1));
   375       Node *ctrl = phase->get_ctrl(test->in(1));
   368       if (ctrl->is_top()) {
   376       if (ctrl->is_top()) {
   373              test->Opcode() == Op_CountedLoopEnd ||
   381              test->Opcode() == Op_CountedLoopEnd ||
   374              test->Opcode() == Op_RangeCheck,
   382              test->Opcode() == Op_RangeCheck,
   375              "Check this code when new subtype is added");
   383              "Check this code when new subtype is added");
   376       // Condition is not a member of this loop?
   384       // Condition is not a member of this loop?
   377       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
   385       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
   378         return true;            // Found reason to peel!
   386         // Found reason to peel!
       
   387         return phase->may_require_nodes(estimate);
   379       }
   388       }
   380     }
   389     }
   381     // Walk up dominators to loop _head looking for test which is
   390     // Walk up dominators to loop _head looking for test which is
   382     // executed on every path thru loop.
   391     // executed on every path thru loop.
   383     test = phase->idom(test);
   392     test = phase->idom(test);
   667   uint trip_count = cl->trip_count();
   676   uint trip_count = cl->trip_count();
   668   // Note, max_juint is used to indicate unknown trip count.
   677   // Note, max_juint is used to indicate unknown trip count.
   669   assert(trip_count > 1, "one iteration loop should be optimized out already");
   678   assert(trip_count > 1, "one iteration loop should be optimized out already");
   670   assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
   679   assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
   671 
   680 
       
   681   // If nodes are depleted, some transform has miscalculated its needs.
       
   682   assert(!phase->exceeding_node_budget(), "sanity");
       
   683 
   672   // Real policy: if we maximally unroll, does it get too big?
   684   // Real policy: if we maximally unroll, does it get too big?
   673   // Allow the unrolled mess to get larger than standard loop
   685   // Allow the unrolled mess to get larger than standard loop
   674   // size.  After all, it will no longer be a loop.
   686   // size.  After all, it will no longer be a loop.
   675   uint body_size    = _body.size();
   687   uint body_size    = _body.size();
   676   uint unroll_limit = (uint)LoopUnrollLimit * 4;
   688   uint unroll_limit = (uint)LoopUnrollLimit * 4;
   677   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
   689   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
   678   if (trip_count > unroll_limit || body_size > unroll_limit) {
   690   if (trip_count > unroll_limit || body_size > unroll_limit) {
   679     return false;
   691     return false;
   680   }
   692   }
   681 
   693 
   682   // Fully unroll a loop with few iterations regardless next
       
   683   // conditions since following loop optimizations will split
       
   684   // such loop anyway (pre-main-post).
       
   685   if (trip_count <= 3)
       
   686     return true;
       
   687 
       
   688   // Take into account that after unroll conjoined heads and tails will fold,
   694   // Take into account that after unroll conjoined heads and tails will fold,
   689   // otherwise policy_unroll() may allow more unrolling than max unrolling.
   695   // otherwise policy_unroll() may allow more unrolling than max unrolling.
   690   uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
   696   uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
   691   uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
   697 
   692   if (body_size != tst_body_size) // Check for int overflow
   698   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
   693     return false;
   699     return false;
       
   700   }
       
   701 
       
   702   // Fully unroll a loop with few iterations regardless next conditions since
       
   703   // following loop optimizations will split such loop anyway (pre-main-post).
       
   704   if (trip_count <= 3) {
       
   705     return phase->may_require_nodes(new_body_size);
       
   706   }
       
   707 
   694   if (new_body_size > unroll_limit ||
   708   if (new_body_size > unroll_limit ||
   695       // Unrolling can result in a large amount of node construction
   709       // Unrolling can result in a large amount of node construction
   696       new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
   710       phase->exceeding_node_budget(new_body_size)) {
   697     return false;
   711     return false;
   698   }
   712   }
   699 
   713 
   700   // Do not unroll a loop with String intrinsics code.
   714   // Do not unroll a loop with String intrinsics code.
   701   // String intrinsics are large and have loops.
   715   // String intrinsics are large and have loops.
   721       }
   735       }
   722 #endif
   736 #endif
   723     } // switch
   737     } // switch
   724   }
   738   }
   725 
   739 
   726   return true; // Do maximally unroll
   740   return phase->may_require_nodes(new_body_size);
   727 }
   741 }
   728 
   742 
   729 
   743 
   730 //------------------------------policy_unroll----------------------------------
   744 //------------------------------policy_unroll----------------------------------
   731 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
   745 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if the
   732 // the loop is a CountedLoop and the body is small enough.
   746 // loop is a CountedLoop and the body is small enough.
   733 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
   747 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
   734 
   748 
   735   CountedLoopNode *cl = _head->as_CountedLoop();
   749   CountedLoopNode *cl = _head->as_CountedLoop();
   736   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
   750   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
   737 
   751 
   738   if (!cl->is_valid_counted_loop())
   752   if (!cl->is_valid_counted_loop()) {
   739     return false; // Malformed counted loop
   753     return false; // Malformed counted loop
       
   754   }
       
   755 
       
   756   // If nodes are depleted, some transform has miscalculated its needs.
       
   757   assert(!phase->exceeding_node_budget(), "sanity");
   740 
   758 
   741   // Protect against over-unrolling.
   759   // Protect against over-unrolling.
   742   // After split at least one iteration will be executed in pre-loop.
   760   // After split at least one iteration will be executed in pre-loop.
   743   if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
   761   if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
   744 
   762     return false;
   745   _local_loop_unroll_limit = LoopUnrollLimit;
   763   }
       
   764   _local_loop_unroll_limit  = LoopUnrollLimit;
   746   _local_loop_unroll_factor = 4;
   765   _local_loop_unroll_factor = 4;
   747   int future_unroll_cnt = cl->unrolled_count() * 2;
   766   int future_unroll_cnt = cl->unrolled_count() * 2;
   748   if (!cl->is_vectorized_loop()) {
   767   if (!cl->is_vectorized_loop()) {
   749     if (future_unroll_cnt > LoopMaxUnroll) return false;
   768     if (future_unroll_cnt > LoopMaxUnroll) return false;
   750   } else {
   769   } else {
   865 
   884 
   866   int slp_max_unroll_factor = cl->slp_max_unroll();
   885   int slp_max_unroll_factor = cl->slp_max_unroll();
   867   if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
   886   if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
   868     LoopMaxUnroll = slp_max_unroll_factor;
   887     LoopMaxUnroll = slp_max_unroll_factor;
   869   }
   888   }
       
   889 
       
   890   uint estimate = est_loop_clone_sz(2, body_size);
       
   891 
   870   if (cl->has_passed_slp()) {
   892   if (cl->has_passed_slp()) {
   871     if (slp_max_unroll_factor >= future_unroll_cnt) return true;
   893     if (slp_max_unroll_factor >= future_unroll_cnt) {
   872     // Normal case: loop too big
   894       return phase->may_require_nodes(estimate);
   873     return false;
   895     }
       
   896     return false; // Loop too big.
   874   }
   897   }
   875 
   898 
   876   // Check for being too big
   899   // Check for being too big
   877   if (body_size > (uint)_local_loop_unroll_limit) {
   900   if (body_size > (uint)_local_loop_unroll_limit) {
   878     if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < (uint)LoopUnrollLimit * 4) {
   901     if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
   879       return true;
   902       return phase->may_require_nodes(estimate);
   880     }
   903     }
   881     // Normal case: loop too big
   904     return false; // Loop too big.
   882     return false;
       
   883   }
   905   }
   884 
   906 
   885   if (cl->is_unroll_only()) {
   907   if (cl->is_unroll_only()) {
   886     if (TraceSuperWordLoopUnrollAnalysis) {
   908     if (TraceSuperWordLoopUnrollAnalysis) {
   887       tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_cnt);
   909       tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
       
   910                     slp_max_unroll_factor, future_unroll_cnt);
   888     }
   911     }
   889   }
   912   }
   890 
   913 
   891   // Unroll once!  (Each trip will soon do double iterations)
   914   // Unroll once!  (Each trip will soon do double iterations)
   892   return true;
   915   return phase->may_require_nodes(estimate);
   893 }
   916 }
   894 
   917 
   895 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) {
   918 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) {
       
   919 
       
   920   // If nodes are depleted, some transform has miscalculated its needs.
       
   921   assert(!phase->exceeding_node_budget(), "sanity");
       
   922 
   896   // Enable this functionality target by target as needed
   923   // Enable this functionality target by target as needed
   897   if (SuperWordLoopUnrollAnalysis) {
   924   if (SuperWordLoopUnrollAnalysis) {
   898     if (!cl->was_slp_analyzed()) {
   925     if (!cl->was_slp_analyzed()) {
   899       SuperWord sw(phase);
   926       SuperWord sw(phase);
   900       sw.transform_loop(this, false);
   927       sw.transform_loop(this, false);
   934 // Return TRUE or FALSE if the loop should be range-check-eliminated.
   961 // Return TRUE or FALSE if the loop should be range-check-eliminated.
   935 // Actually we do iteration-splitting, a more powerful form of RCE.
   962 // Actually we do iteration-splitting, a more powerful form of RCE.
   936 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
   963 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
   937   if (!RangeCheckElimination) return false;
   964   if (!RangeCheckElimination) return false;
   938 
   965 
       
   966   // If nodes are depleted, some transform has miscalculated its needs.
       
   967   assert(!phase->exceeding_node_budget(), "sanity");
       
   968 
   939   CountedLoopNode *cl = _head->as_CountedLoop();
   969   CountedLoopNode *cl = _head->as_CountedLoop();
   940   // If we unrolled with no intention of doing RCE and we later
   970   // If we unrolled with no intention of doing RCE and we later
   941   // changed our minds, we got no pre-loop.  Either we need to
   971   // changed our minds, we got no pre-loop.  Either we need to
   942   // make a new pre-loop, or we gotta disallow RCE.
   972   // make a new pre-loop, or we gotta disallow RCE.
   943   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   973   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   984       }
  1014       }
   985 
  1015 
   986       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
  1016       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
   987         continue;
  1017         continue;
   988       }
  1018       }
   989       // Yeah!  Found a test like 'trip+off vs limit'
  1019       // Found a test like 'trip+off vs  limit'.  Test is an IfNode, has two
   990       // Test is an IfNode, has 2 projections.  If BOTH are in the loop
  1020       // (2) projections.  If BOTH are in  the loop we need loop unswitching
   991       // we need loop unswitching instead of iteration splitting.
  1021       // instead of iteration splitting.
   992       if (is_loop_exit(iff)) {
  1022       if (is_loop_exit(iff)) {
   993         return true;            // Found reason to split iterations
  1023         // Found valid reason to split iterations (if there is room).
       
  1024         // NOTE: Usually a gross overestimate.
       
  1025         return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
   994       }
  1026       }
   995     } // End of is IF
  1027     } // End of is IF
   996   }
  1028   }
   997 
  1029 
   998   return false;
  1030   return false;
  1000 
  1032 
  1001 //------------------------------policy_peel_only-------------------------------
  1033 //------------------------------policy_peel_only-------------------------------
  1002 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
  1034 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
  1003 // for unrolling loops with NO array accesses.
  1035 // for unrolling loops with NO array accesses.
  1004 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
  1036 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
       
  1037 
       
  1038   // If nodes are depleted, some transform has miscalculated its needs.
       
  1039   assert(!phase->exceeding_node_budget(), "sanity");
       
  1040 
  1005   // check for vectorized loops, any peeling done was already applied
  1041   // check for vectorized loops, any peeling done was already applied
  1006   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
  1042   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
  1007     return false;
  1043     return false;
  1008   }
  1044   }
  1009 
  1045 
  1483   CountedLoopNode *cl = loop->_head->as_CountedLoop();
  1519   CountedLoopNode *cl = loop->_head->as_CountedLoop();
  1484 
  1520 
  1485   // only process vectorized main loops
  1521   // only process vectorized main loops
  1486   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
  1522   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
  1487 
  1523 
       
  1524   if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
       
  1525     return;
       
  1526   }
  1488   int slp_max_unroll_factor = cl->slp_max_unroll();
  1527   int slp_max_unroll_factor = cl->slp_max_unroll();
  1489   int cur_unroll = cl->unrolled_count();
  1528   int cur_unroll = cl->unrolled_count();
  1490 
  1529 
  1491   if (slp_max_unroll_factor == 0) return;
  1530   if (slp_max_unroll_factor == 0) return;
  1492 
  1531 
  1827     assert(opaq != NULL && opaq->in(1) == limit, "sanity");
  1866     assert(opaq != NULL && opaq->in(1) == limit, "sanity");
  1828 
  1867 
  1829     // Verify that policy_unroll result is still valid.
  1868     // Verify that policy_unroll result is still valid.
  1830     const TypeInt* limit_type = _igvn.type(limit)->is_int();
  1869     const TypeInt* limit_type = _igvn.type(limit)->is_int();
  1831     assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
  1870     assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
  1832         stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
  1871            stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo),
       
  1872            "sanity");
  1833 
  1873 
  1834     if (limit->is_Con()) {
  1874     if (limit->is_Con()) {
  1835       // The check in policy_unroll and the assert above guarantee
  1875       // The check in policy_unroll and the assert above guarantee
  1836       // no underflow if limit is constant.
  1876       // no underflow if limit is constant.
  1837       new_limit = _igvn.intcon(limit->get_int() - stride_con);
  1877       new_limit = _igvn.intcon(limit->get_int() - stride_con);
  1896         register_new_node(adj_bool, ctrl);
  1936         register_new_node(adj_bool, ctrl);
  1897         new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
  1937         new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
  1898       }
  1938       }
  1899       register_new_node(new_limit, ctrl);
  1939       register_new_node(new_limit, ctrl);
  1900     }
  1940     }
       
  1941 
  1901     assert(new_limit != NULL, "");
  1942     assert(new_limit != NULL, "");
  1902     // Replace in loop test.
  1943     // Replace in loop test.
  1903     assert(loop_end->in(1)->in(1) == cmp, "sanity");
  1944     assert(loop_end->in(1)->in(1) == cmp, "sanity");
  1904     if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) {
  1945     if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) {
  1905       // Don't need to create new test since only one user.
  1946       // Don't need to create new test since only one user.
  2007       }
  2048       }
  2008       tty->print_cr(" ");
  2049       tty->print_cr(" ");
  2009     }
  2050     }
  2010   }
  2051   }
  2011 #endif
  2052 #endif
  2012 
       
  2013 }
  2053 }
  2014 
  2054 
  2015 //------------------------------do_maximally_unroll----------------------------
  2055 //------------------------------do_maximally_unroll----------------------------
  2016 
  2056 
  2017 void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new) {
  2057 void PhaseIdealLoop::do_maximally_unroll(IdealLoopTree *loop, Node_List &old_new) {
  3133   }
  3173   }
  3134   // Check and remove empty loops (spam micro-benchmarks)
  3174   // Check and remove empty loops (spam micro-benchmarks)
  3135   if (do_remove_empty_loop(phase)) {
  3175   if (do_remove_empty_loop(phase)) {
  3136     return true;  // Here we removed an empty loop
  3176     return true;  // Here we removed an empty loop
  3137   }
  3177   }
  3138   bool should_peel = policy_peeling(phase); // Should we peel?
  3178 
  3139 
  3179   AutoNodeBudget node_budget(phase);
       
  3180 
       
  3181   bool should_peel     = policy_peeling(phase);
  3140   bool should_unswitch = policy_unswitching(phase);
  3182   bool should_unswitch = policy_unswitching(phase);
  3141 
  3183 
  3142   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
  3184   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
  3143   // This removes loop-invariant tests (usually null checks).
  3185   // This removes loop-invariant tests (usually null checks).
  3144   if (!_head->is_CountedLoop()) { // Non-counted loop
  3186   if (!_head->is_CountedLoop()) { // Non-counted loop
  3169   if (cl->is_normal_loop()) {
  3211   if (cl->is_normal_loop()) {
  3170     if (should_unswitch) {
  3212     if (should_unswitch) {
  3171       phase->do_unswitching(this, old_new);
  3213       phase->do_unswitching(this, old_new);
  3172       return true;
  3214       return true;
  3173     }
  3215     }
  3174     bool should_maximally_unroll =  policy_maximally_unroll(phase);
  3216     bool should_maximally_unroll = policy_maximally_unroll(phase);
  3175     if (should_maximally_unroll) {
  3217     if (should_maximally_unroll) {
  3176       // Here we did some unrolling and peeling.  Eventually we will
  3218       // Here we did some unrolling and peeling.  Eventually we will
  3177       // completely unroll this loop and it will no longer be a loop.
  3219       // completely unroll this loop and it will no longer be a loop.
  3178       phase->do_maximally_unroll(this,old_new);
  3220       phase->do_maximally_unroll(this, old_new);
  3179       return true;
  3221       return true;
  3180     }
  3222     }
  3181   }
       
  3182 
       
  3183   // Skip next optimizations if running low on nodes. Note that
       
  3184   // policy_unswitching and policy_maximally_unroll have this check.
       
  3185   int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes();
       
  3186   if ((int)(2 * _body.size()) > nodes_left) {
       
  3187     return true;
       
  3188   }
  3223   }
  3189 
  3224 
  3190   // Counted loops may be peeled, may need some iterations run up
  3225   // Counted loops may be peeled, may need some iterations run up
  3191   // front for RCE, and may want to align loop refs to a cache
  3226   // front for RCE, and may want to align loop refs to a cache
  3192   // line.  Thus we clone a full loop up front whose trip count is
  3227   // line.  Thus we clone a full loop up front whose trip count is
  3198 
  3233 
  3199   // A post-loop will finish any odd iterations (leftover after
  3234   // A post-loop will finish any odd iterations (leftover after
  3200   // unrolling), plus any needed for RCE purposes.
  3235   // unrolling), plus any needed for RCE purposes.
  3201 
  3236 
  3202   bool should_unroll = policy_unroll(phase);
  3237   bool should_unroll = policy_unroll(phase);
  3203 
  3238   bool should_rce    = policy_range_check(phase);
  3204   bool should_rce = policy_range_check(phase);
  3239   // TODO: Remove align -- not used.
  3205 
  3240   bool should_align  = policy_align(phase);
  3206   bool should_align = policy_align(phase);
  3241 
  3207 
  3242   // If not RCE'ing  (iteration splitting) or Aligning, then we  do not need a
  3208   // If not RCE'ing (iteration splitting) or Aligning, then we do not
  3243   // pre-loop.  We may still need to peel an initial iteration but we will not
  3209   // need a pre-loop.  We may still need to peel an initial iteration but
  3244   // be needing an unknown number of pre-iterations.
  3210   // we will not be needing an unknown number of pre-iterations.
       
  3211   //
  3245   //
  3212   // Basically, if may_rce_align reports FALSE first time through,
  3246   // Basically, if may_rce_align reports FALSE first time through, we will not
  3213   // we will not be able to later do RCE or Aligning on this loop.
  3247   // be able to later do RCE or Aligning on this loop.
  3214   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
  3248   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
  3215 
  3249 
  3216   // If we have any of these conditions (RCE, alignment, unrolling) met, then
  3250   // If we have any of these conditions (RCE, alignment, unrolling) met, then
  3217   // we switch to the pre-/main-/post-loop model.  This model also covers
  3251   // we switch to the pre-/main-/post-loop model.  This model also covers
  3218   // peeling.
  3252   // peeling.
  3219   if (should_rce || should_align || should_unroll) {
  3253   if (should_rce || should_align || should_unroll) {
  3220     if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
  3254     if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
       
  3255       if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
       
  3256         return false;
       
  3257       }
  3221       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
  3258       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
  3222 
  3259     }
  3223     // Adjust the pre- and main-loop limits to let the pre and post loops run
  3260     // Adjust the pre- and main-loop limits to let the pre and post loops run
  3224     // with full checks, but the main-loop with no checks.  Remove said
  3261     // with full checks, but the main-loop with no checks.  Remove said
  3225     // checks from the main body.
  3262     // checks from the main body.
  3226     if (should_rce) {
  3263     if (should_rce) {
  3227       if (phase->do_range_check(this, old_new) != 0) {
  3264       if (phase->do_range_check(this, old_new) != 0) {
  3284   if (_allow_optimizations && is_innermost()) {
  3321   if (_allow_optimizations && is_innermost()) {
  3285     if (!_has_call) {
  3322     if (!_has_call) {
  3286       if (!iteration_split_impl(phase, old_new)) {
  3323       if (!iteration_split_impl(phase, old_new)) {
  3287         return false;
  3324         return false;
  3288       }
  3325       }
  3289     } else if (policy_unswitching(phase)) {
  3326     } else {
  3290       phase->do_unswitching(this, old_new);
  3327       AutoNodeBudget node_budget(phase);
       
  3328       if (policy_unswitching(phase)) {
       
  3329         phase->do_unswitching(this, old_new);
       
  3330       }
  3291     }
  3331     }
  3292   }
  3332   }
  3293 
  3333 
  3294   // Minor offset re-organization to remove loop-fallout uses of
  3334   // Minor offset re-organization to remove loop-fallout uses of
  3295   // trip counter when there was no major reshaping.
  3335   // trip counter when there was no major reshaping.