344 //------------------------------policy_peeling--------------------------------- |
344 //------------------------------policy_peeling--------------------------------- |
345 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can |
345 // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can |
346 // make some loop-invariant test (usually a null-check) happen before the loop. |
346 // make some loop-invariant test (usually a null-check) happen before the loop. |
347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const { |
347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const { |
348 IdealLoopTree *loop = (IdealLoopTree*)this; |
348 IdealLoopTree *loop = (IdealLoopTree*)this; |
349 Node *test = loop->tail(); |
349 |
350 int body_size = loop->_body.size(); |
350 // If nodes are depleted, some transform has miscalculated its needs. |
|
351 assert(!phase->exceeding_node_budget(), "sanity"); |
|
352 |
|
353 uint body_size = loop->_body.size(); |
351 // Peeling does loop cloning which can result in O(N^2) node construction |
354 // Peeling does loop cloning which can result in O(N^2) node construction |
352 if (body_size > 255 /* Prevent overflow for large body_size */ |
355 if (body_size > 255) { |
353 || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit()) { |
356 return false; // Prevent overflow for large body size |
354 return false; // too large to safely clone |
357 } |
|
358 uint estimate = body_size * body_size; |
|
359 if (phase->exceeding_node_budget(estimate)) { |
|
360 return false; // Too large to safely clone |
355 } |
361 } |
356 |
362 |
357 // check for vectorized loops, any peeling done was already applied |
363 // check for vectorized loops, any peeling done was already applied |
358 if (_head->is_CountedLoop()) { |
364 if (_head->is_CountedLoop()) { |
359 CountedLoopNode* cl = _head->as_CountedLoop(); |
365 CountedLoopNode* cl = _head->as_CountedLoop(); |
360 if (cl->is_unroll_only() || cl->trip_count() == 1) { |
366 if (cl->is_unroll_only() || cl->trip_count() == 1) { |
361 return false; |
367 return false; |
362 } |
368 } |
363 } |
369 } |
|
370 |
|
371 Node* test = loop->tail(); |
364 |
372 |
365 while (test != _head) { // Scan till run off top of loop |
373 while (test != _head) { // Scan till run off top of loop |
366 if (test->is_If()) { // Test? |
374 if (test->is_If()) { // Test? |
367 Node *ctrl = phase->get_ctrl(test->in(1)); |
375 Node *ctrl = phase->get_ctrl(test->in(1)); |
368 if (ctrl->is_top()) { |
376 if (ctrl->is_top()) { |
373 test->Opcode() == Op_CountedLoopEnd || |
381 test->Opcode() == Op_CountedLoopEnd || |
374 test->Opcode() == Op_RangeCheck, |
382 test->Opcode() == Op_RangeCheck, |
375 "Check this code when new subtype is added"); |
383 "Check this code when new subtype is added"); |
376 // Condition is not a member of this loop? |
384 // Condition is not a member of this loop? |
377 if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) { |
385 if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) { |
378 return true; // Found reason to peel! |
386 // Found reason to peel! |
|
387 return phase->may_require_nodes(estimate); |
379 } |
388 } |
380 } |
389 } |
381 // Walk up dominators to loop _head looking for test which is |
390 // Walk up dominators to loop _head looking for test which is |
382 // executed on every path thru loop. |
391 // executed on every path thru loop. |
383 test = phase->idom(test); |
392 test = phase->idom(test); |
667 uint trip_count = cl->trip_count(); |
676 uint trip_count = cl->trip_count(); |
668 // Note, max_juint is used to indicate unknown trip count. |
677 // Note, max_juint is used to indicate unknown trip count. |
669 assert(trip_count > 1, "one iteration loop should be optimized out already"); |
678 assert(trip_count > 1, "one iteration loop should be optimized out already"); |
670 assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); |
679 assert(trip_count < max_juint, "exact trip_count should be less than max_uint."); |
671 |
680 |
|
681 // If nodes are depleted, some transform has miscalculated its needs. |
|
682 assert(!phase->exceeding_node_budget(), "sanity"); |
|
683 |
672 // Real policy: if we maximally unroll, does it get too big? |
684 // Real policy: if we maximally unroll, does it get too big? |
673 // Allow the unrolled mess to get larger than standard loop |
685 // Allow the unrolled mess to get larger than standard loop |
674 // size. After all, it will no longer be a loop. |
686 // size. After all, it will no longer be a loop. |
675 uint body_size = _body.size(); |
687 uint body_size = _body.size(); |
676 uint unroll_limit = (uint)LoopUnrollLimit * 4; |
688 uint unroll_limit = (uint)LoopUnrollLimit * 4; |
677 assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); |
689 assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits"); |
678 if (trip_count > unroll_limit || body_size > unroll_limit) { |
690 if (trip_count > unroll_limit || body_size > unroll_limit) { |
679 return false; |
691 return false; |
680 } |
692 } |
681 |
693 |
682 // Fully unroll a loop with few iterations regardless next |
|
683 // conditions since following loop optimizations will split |
|
684 // such loop anyway (pre-main-post). |
|
685 if (trip_count <= 3) |
|
686 return true; |
|
687 |
|
688 // Take into account that after unroll conjoined heads and tails will fold, |
694 // Take into account that after unroll conjoined heads and tails will fold, |
689 // otherwise policy_unroll() may allow more unrolling than max unrolling. |
695 // otherwise policy_unroll() may allow more unrolling than max unrolling. |
690 uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count; |
696 uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE); |
691 uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE; |
697 |
692 if (body_size != tst_body_size) // Check for int overflow |
698 if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow). |
693 return false; |
699 return false; |
|
700 } |
|
701 |
|
702 // Fully unroll a loop with few iterations regardless next conditions since |
|
703 // following loop optimizations will split such loop anyway (pre-main-post). |
|
704 if (trip_count <= 3) { |
|
705 return phase->may_require_nodes(new_body_size); |
|
706 } |
|
707 |
694 if (new_body_size > unroll_limit || |
708 if (new_body_size > unroll_limit || |
695 // Unrolling can result in a large amount of node construction |
709 // Unrolling can result in a large amount of node construction |
696 new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) { |
710 phase->exceeding_node_budget(new_body_size)) { |
697 return false; |
711 return false; |
698 } |
712 } |
699 |
713 |
700 // Do not unroll a loop with String intrinsics code. |
714 // Do not unroll a loop with String intrinsics code. |
701 // String intrinsics are large and have loops. |
715 // String intrinsics are large and have loops. |
721 } |
735 } |
722 #endif |
736 #endif |
723 } // switch |
737 } // switch |
724 } |
738 } |
725 |
739 |
726 return true; // Do maximally unroll |
740 return phase->may_require_nodes(new_body_size); |
727 } |
741 } |
728 |
742 |
729 |
743 |
730 //------------------------------policy_unroll---------------------------------- |
744 //------------------------------policy_unroll---------------------------------- |
731 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if |
745 // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if the |
732 // the loop is a CountedLoop and the body is small enough. |
746 // loop is a CountedLoop and the body is small enough. |
733 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { |
747 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { |
734 |
748 |
735 CountedLoopNode *cl = _head->as_CountedLoop(); |
749 CountedLoopNode *cl = _head->as_CountedLoop(); |
736 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); |
750 assert(cl->is_normal_loop() || cl->is_main_loop(), ""); |
737 |
751 |
738 if (!cl->is_valid_counted_loop()) |
752 if (!cl->is_valid_counted_loop()) { |
739 return false; // Malformed counted loop |
753 return false; // Malformed counted loop |
|
754 } |
|
755 |
|
756 // If nodes are depleted, some transform has miscalculated its needs. |
|
757 assert(!phase->exceeding_node_budget(), "sanity"); |
740 |
758 |
741 // Protect against over-unrolling. |
759 // Protect against over-unrolling. |
742 // After split at least one iteration will be executed in pre-loop. |
760 // After split at least one iteration will be executed in pre-loop. |
743 if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false; |
761 if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) { |
744 |
762 return false; |
745 _local_loop_unroll_limit = LoopUnrollLimit; |
763 } |
|
764 _local_loop_unroll_limit = LoopUnrollLimit; |
746 _local_loop_unroll_factor = 4; |
765 _local_loop_unroll_factor = 4; |
747 int future_unroll_cnt = cl->unrolled_count() * 2; |
766 int future_unroll_cnt = cl->unrolled_count() * 2; |
748 if (!cl->is_vectorized_loop()) { |
767 if (!cl->is_vectorized_loop()) { |
749 if (future_unroll_cnt > LoopMaxUnroll) return false; |
768 if (future_unroll_cnt > LoopMaxUnroll) return false; |
750 } else { |
769 } else { |
865 |
884 |
866 int slp_max_unroll_factor = cl->slp_max_unroll(); |
885 int slp_max_unroll_factor = cl->slp_max_unroll(); |
867 if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) { |
886 if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) { |
868 LoopMaxUnroll = slp_max_unroll_factor; |
887 LoopMaxUnroll = slp_max_unroll_factor; |
869 } |
888 } |
|
889 |
|
890 uint estimate = est_loop_clone_sz(2, body_size); |
|
891 |
870 if (cl->has_passed_slp()) { |
892 if (cl->has_passed_slp()) { |
871 if (slp_max_unroll_factor >= future_unroll_cnt) return true; |
893 if (slp_max_unroll_factor >= future_unroll_cnt) { |
872 // Normal case: loop too big |
894 return phase->may_require_nodes(estimate); |
873 return false; |
895 } |
|
896 return false; // Loop too big. |
874 } |
897 } |
875 |
898 |
876 // Check for being too big |
899 // Check for being too big |
877 if (body_size > (uint)_local_loop_unroll_limit) { |
900 if (body_size > (uint)_local_loop_unroll_limit) { |
878 if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < (uint)LoopUnrollLimit * 4) { |
901 if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) { |
879 return true; |
902 return phase->may_require_nodes(estimate); |
880 } |
903 } |
881 // Normal case: loop too big |
904 return false; // Loop too big. |
882 return false; |
|
883 } |
905 } |
884 |
906 |
885 if (cl->is_unroll_only()) { |
907 if (cl->is_unroll_only()) { |
886 if (TraceSuperWordLoopUnrollAnalysis) { |
908 if (TraceSuperWordLoopUnrollAnalysis) { |
887 tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_cnt); |
909 tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n", |
|
910 slp_max_unroll_factor, future_unroll_cnt); |
888 } |
911 } |
889 } |
912 } |
890 |
913 |
891 // Unroll once! (Each trip will soon do double iterations) |
914 // Unroll once! (Each trip will soon do double iterations) |
892 return true; |
915 return phase->may_require_nodes(estimate); |
893 } |
916 } |
894 |
917 |
895 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) { |
918 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) { |
|
919 |
|
920 // If nodes are depleted, some transform has miscalculated its needs. |
|
921 assert(!phase->exceeding_node_budget(), "sanity"); |
|
922 |
896 // Enable this functionality target by target as needed |
923 // Enable this functionality target by target as needed |
897 if (SuperWordLoopUnrollAnalysis) { |
924 if (SuperWordLoopUnrollAnalysis) { |
898 if (!cl->was_slp_analyzed()) { |
925 if (!cl->was_slp_analyzed()) { |
899 SuperWord sw(phase); |
926 SuperWord sw(phase); |
900 sw.transform_loop(this, false); |
927 sw.transform_loop(this, false); |
934 // Return TRUE or FALSE if the loop should be range-check-eliminated. |
961 // Return TRUE or FALSE if the loop should be range-check-eliminated. |
935 // Actually we do iteration-splitting, a more powerful form of RCE. |
962 // Actually we do iteration-splitting, a more powerful form of RCE. |
936 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const { |
963 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const { |
937 if (!RangeCheckElimination) return false; |
964 if (!RangeCheckElimination) return false; |
938 |
965 |
|
966 // If nodes are depleted, some transform has miscalculated its needs. |
|
967 assert(!phase->exceeding_node_budget(), "sanity"); |
|
968 |
939 CountedLoopNode *cl = _head->as_CountedLoop(); |
969 CountedLoopNode *cl = _head->as_CountedLoop(); |
940 // If we unrolled with no intention of doing RCE and we later |
970 // If we unrolled with no intention of doing RCE and we later |
941 // changed our minds, we got no pre-loop. Either we need to |
971 // changed our minds, we got no pre-loop. Either we need to |
942 // make a new pre-loop, or we gotta disallow RCE. |
972 // make a new pre-loop, or we gotta disallow RCE. |
943 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. |
973 if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. |
984 } |
1014 } |
985 |
1015 |
986 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { |
1016 if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { |
987 continue; |
1017 continue; |
988 } |
1018 } |
989 // Yeah! Found a test like 'trip+off vs limit' |
1019 // Found a test like 'trip+off vs limit'. Test is an IfNode, has two |
990 // Test is an IfNode, has 2 projections. If BOTH are in the loop |
1020 // (2) projections. If BOTH are in the loop we need loop unswitching |
991 // we need loop unswitching instead of iteration splitting. |
1021 // instead of iteration splitting. |
992 if (is_loop_exit(iff)) { |
1022 if (is_loop_exit(iff)) { |
993 return true; // Found reason to split iterations |
1023 // Found valid reason to split iterations (if there is room). |
|
1024 // NOTE: Usually a gross overestimate. |
|
1025 return phase->may_require_nodes(est_loop_clone_sz(2, _body.size())); |
994 } |
1026 } |
995 } // End of is IF |
1027 } // End of is IF |
996 } |
1028 } |
997 |
1029 |
998 return false; |
1030 return false; |
1000 |
1032 |
1001 //------------------------------policy_peel_only------------------------------- |
1033 //------------------------------policy_peel_only------------------------------- |
1002 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful |
1034 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned. Useful |
1003 // for unrolling loops with NO array accesses. |
1035 // for unrolling loops with NO array accesses. |
1004 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const { |
1036 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const { |
|
1037 |
|
1038 // If nodes are depleted, some transform has miscalculated its needs. |
|
1039 assert(!phase->exceeding_node_budget(), "sanity"); |
|
1040 |
1005 // check for vectorized loops, any peeling done was already applied |
1041 // check for vectorized loops, any peeling done was already applied |
1006 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) { |
1042 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) { |
1007 return false; |
1043 return false; |
1008 } |
1044 } |
1009 |
1045 |
1483 CountedLoopNode *cl = loop->_head->as_CountedLoop(); |
1519 CountedLoopNode *cl = loop->_head->as_CountedLoop(); |
1484 |
1520 |
1485 // only process vectorized main loops |
1521 // only process vectorized main loops |
1486 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return; |
1522 if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return; |
1487 |
1523 |
|
1524 if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) { |
|
1525 return; |
|
1526 } |
1488 int slp_max_unroll_factor = cl->slp_max_unroll(); |
1527 int slp_max_unroll_factor = cl->slp_max_unroll(); |
1489 int cur_unroll = cl->unrolled_count(); |
1528 int cur_unroll = cl->unrolled_count(); |
1490 |
1529 |
1491 if (slp_max_unroll_factor == 0) return; |
1530 if (slp_max_unroll_factor == 0) return; |
1492 |
1531 |
1827 assert(opaq != NULL && opaq->in(1) == limit, "sanity"); |
1866 assert(opaq != NULL && opaq->in(1) == limit, "sanity"); |
1828 |
1867 |
1829 // Verify that policy_unroll result is still valid. |
1868 // Verify that policy_unroll result is still valid. |
1830 const TypeInt* limit_type = _igvn.type(limit)->is_int(); |
1869 const TypeInt* limit_type = _igvn.type(limit)->is_int(); |
1831 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || |
1870 assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) || |
1832 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity"); |
1871 stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), |
|
1872 "sanity"); |
1833 |
1873 |
1834 if (limit->is_Con()) { |
1874 if (limit->is_Con()) { |
1835 // The check in policy_unroll and the assert above guarantee |
1875 // The check in policy_unroll and the assert above guarantee |
1836 // no underflow if limit is constant. |
1876 // no underflow if limit is constant. |
1837 new_limit = _igvn.intcon(limit->get_int() - stride_con); |
1877 new_limit = _igvn.intcon(limit->get_int() - stride_con); |
1896 register_new_node(adj_bool, ctrl); |
1936 register_new_node(adj_bool, ctrl); |
1897 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); |
1937 new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT); |
1898 } |
1938 } |
1899 register_new_node(new_limit, ctrl); |
1939 register_new_node(new_limit, ctrl); |
1900 } |
1940 } |
|
1941 |
1901 assert(new_limit != NULL, ""); |
1942 assert(new_limit != NULL, ""); |
1902 // Replace in loop test. |
1943 // Replace in loop test. |
1903 assert(loop_end->in(1)->in(1) == cmp, "sanity"); |
1944 assert(loop_end->in(1)->in(1) == cmp, "sanity"); |
1904 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) { |
1945 if (cmp->outcnt() == 1 && loop_end->in(1)->outcnt() == 1) { |
1905 // Don't need to create new test since only one user. |
1946 // Don't need to create new test since only one user. |
3133 } |
3173 } |
3134 // Check and remove empty loops (spam micro-benchmarks) |
3174 // Check and remove empty loops (spam micro-benchmarks) |
3135 if (do_remove_empty_loop(phase)) { |
3175 if (do_remove_empty_loop(phase)) { |
3136 return true; // Here we removed an empty loop |
3176 return true; // Here we removed an empty loop |
3137 } |
3177 } |
3138 bool should_peel = policy_peeling(phase); // Should we peel? |
3178 |
3139 |
3179 AutoNodeBudget node_budget(phase); |
|
3180 |
|
3181 bool should_peel = policy_peeling(phase); |
3140 bool should_unswitch = policy_unswitching(phase); |
3182 bool should_unswitch = policy_unswitching(phase); |
3141 |
3183 |
3142 // Non-counted loops may be peeled; exactly 1 iteration is peeled. |
3184 // Non-counted loops may be peeled; exactly 1 iteration is peeled. |
3143 // This removes loop-invariant tests (usually null checks). |
3185 // This removes loop-invariant tests (usually null checks). |
3144 if (!_head->is_CountedLoop()) { // Non-counted loop |
3186 if (!_head->is_CountedLoop()) { // Non-counted loop |
3169 if (cl->is_normal_loop()) { |
3211 if (cl->is_normal_loop()) { |
3170 if (should_unswitch) { |
3212 if (should_unswitch) { |
3171 phase->do_unswitching(this, old_new); |
3213 phase->do_unswitching(this, old_new); |
3172 return true; |
3214 return true; |
3173 } |
3215 } |
3174 bool should_maximally_unroll = policy_maximally_unroll(phase); |
3216 bool should_maximally_unroll = policy_maximally_unroll(phase); |
3175 if (should_maximally_unroll) { |
3217 if (should_maximally_unroll) { |
3176 // Here we did some unrolling and peeling. Eventually we will |
3218 // Here we did some unrolling and peeling. Eventually we will |
3177 // completely unroll this loop and it will no longer be a loop. |
3219 // completely unroll this loop and it will no longer be a loop. |
3178 phase->do_maximally_unroll(this,old_new); |
3220 phase->do_maximally_unroll(this, old_new); |
3179 return true; |
3221 return true; |
3180 } |
3222 } |
3181 } |
|
3182 |
|
3183 // Skip next optimizations if running low on nodes. Note that |
|
3184 // policy_unswitching and policy_maximally_unroll have this check. |
|
3185 int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes(); |
|
3186 if ((int)(2 * _body.size()) > nodes_left) { |
|
3187 return true; |
|
3188 } |
3223 } |
3189 |
3224 |
3190 // Counted loops may be peeled, may need some iterations run up |
3225 // Counted loops may be peeled, may need some iterations run up |
3191 // front for RCE, and may want to align loop refs to a cache |
3226 // front for RCE, and may want to align loop refs to a cache |
3192 // line. Thus we clone a full loop up front whose trip count is |
3227 // line. Thus we clone a full loop up front whose trip count is |
3198 |
3233 |
3199 // A post-loop will finish any odd iterations (leftover after |
3234 // A post-loop will finish any odd iterations (leftover after |
3200 // unrolling), plus any needed for RCE purposes. |
3235 // unrolling), plus any needed for RCE purposes. |
3201 |
3236 |
3202 bool should_unroll = policy_unroll(phase); |
3237 bool should_unroll = policy_unroll(phase); |
3203 |
3238 bool should_rce = policy_range_check(phase); |
3204 bool should_rce = policy_range_check(phase); |
3239 // TODO: Remove align -- not used. |
3205 |
3240 bool should_align = policy_align(phase); |
3206 bool should_align = policy_align(phase); |
3241 |
3207 |
3242 // If not RCE'ing (iteration splitting) or Aligning, then we do not need a |
3208 // If not RCE'ing (iteration splitting) or Aligning, then we do not |
3243 // pre-loop. We may still need to peel an initial iteration but we will not |
3209 // need a pre-loop. We may still need to peel an initial iteration but |
3244 // be needing an unknown number of pre-iterations. |
3210 // we will not be needing an unknown number of pre-iterations. |
|
3211 // |
3245 // |
3212 // Basically, if may_rce_align reports FALSE first time through, |
3246 // Basically, if may_rce_align reports FALSE first time through, we will not |
3213 // we will not be able to later do RCE or Aligning on this loop. |
3247 // be able to later do RCE or Aligning on this loop. |
3214 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align; |
3248 bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align; |
3215 |
3249 |
3216 // If we have any of these conditions (RCE, alignment, unrolling) met, then |
3250 // If we have any of these conditions (RCE, alignment, unrolling) met, then |
3217 // we switch to the pre-/main-/post-loop model. This model also covers |
3251 // we switch to the pre-/main-/post-loop model. This model also covers |
3218 // peeling. |
3252 // peeling. |
3219 if (should_rce || should_align || should_unroll) { |
3253 if (should_rce || should_align || should_unroll) { |
3220 if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops |
3254 if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops |
|
3255 if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) { |
|
3256 return false; |
|
3257 } |
3221 phase->insert_pre_post_loops(this,old_new, !may_rce_align); |
3258 phase->insert_pre_post_loops(this,old_new, !may_rce_align); |
3222 |
3259 } |
3223 // Adjust the pre- and main-loop limits to let the pre and post loops run |
3260 // Adjust the pre- and main-loop limits to let the pre and post loops run |
3224 // with full checks, but the main-loop with no checks. Remove said |
3261 // with full checks, but the main-loop with no checks. Remove said |
3225 // checks from the main body. |
3262 // checks from the main body. |
3226 if (should_rce) { |
3263 if (should_rce) { |
3227 if (phase->do_range_check(this, old_new) != 0) { |
3264 if (phase->do_range_check(this, old_new) != 0) { |
3284 if (_allow_optimizations && is_innermost()) { |
3321 if (_allow_optimizations && is_innermost()) { |
3285 if (!_has_call) { |
3322 if (!_has_call) { |
3286 if (!iteration_split_impl(phase, old_new)) { |
3323 if (!iteration_split_impl(phase, old_new)) { |
3287 return false; |
3324 return false; |
3288 } |
3325 } |
3289 } else if (policy_unswitching(phase)) { |
3326 } else { |
3290 phase->do_unswitching(this, old_new); |
3327 AutoNodeBudget node_budget(phase); |
|
3328 if (policy_unswitching(phase)) { |
|
3329 phase->do_unswitching(this, old_new); |
|
3330 } |
3291 } |
3331 } |
3292 } |
3332 } |
3293 |
3333 |
3294 // Minor offset re-organization to remove loop-fallout uses of |
3334 // Minor offset re-organization to remove loop-fallout uses of |
3295 // trip counter when there was no major reshaping. |
3335 // trip counter when there was no major reshaping. |