7004555: Add new policy for one iteration loops
Summary: Add new policy for one iteration loops (mostly formal pre- loops).
Reviewed-by: never
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Thu Apr 07 21:32:23 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp Fri Apr 08 14:56:22 2011 -0700
@@ -63,6 +63,46 @@
}
}
+//------------------------------compute_exact_trip_count-----------------------
+// Compute loop exact trip count if possible. Do not recalculate trip count for
+// split loops (pre-main-post) which have their limits and inits behind Opaque node.
+void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
+ if (!_head->as_Loop()->is_valid_counted_loop()) {
+ return;
+ }
+ CountedLoopNode* cl = _head->as_CountedLoop();
+ // Trip count may become nonexact for iteration split loops since
+ // RCE modifies limits. Note, _trip_count value is not reset since
+ // it is used to limit unrolling of main loop.
+ cl->set_nonexact_trip_count();
+
+ // Loop's test should be part of loop.
+ if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
+ return; // Infinite loop
+
+#ifdef ASSERT
+ BoolTest::mask bt = cl->loopexit()->test_trip();
+ assert(bt == BoolTest::lt || bt == BoolTest::gt ||
+ bt == BoolTest::ne, "canonical test is expected");
+#endif
+
+ Node* init_n = cl->init_trip();
+ Node* limit_n = cl->limit();
+ if (init_n != NULL && init_n->is_Con() &&
+ limit_n != NULL && limit_n->is_Con()) {
+ // Use longs to avoid integer overflow.
+ int stride_con = cl->stride_con();
+ long init_con = cl->init_trip()->get_int();
+ long limit_con = cl->limit()->get_int();
+ int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
+ long trip_count = (limit_con - init_con + stride_m)/stride_con;
+ if (trip_count > 0 && (julong)trip_count < (julong)max_juint) {
+ // Set exact trip count.
+ cl->set_exact_trip_count((uint)trip_count);
+ }
+ }
+}
+
//------------------------------compute_profile_trip_cnt----------------------------
// Compute loop trip count from profile data as
// (backedge_count + loop_exit_count) / loop_exit_count
@@ -533,29 +573,15 @@
if (!cl->is_valid_counted_loop())
return false; // Malformed counted loop
- Node *init_n = cl->init_trip();
- Node *limit_n = cl->limit();
-
- // Non-constant bounds
- if (init_n == NULL || !init_n->is_Con() ||
- limit_n == NULL || !limit_n->is_Con()) {
+ if (!cl->has_exact_trip_count()) {
+ // Trip count is not exact.
return false;
}
- // Use longs to avoid integer overflow.
- int stride_con = cl->stride_con();
- long init_con = cl->init_trip()->get_int();
- long limit_con = cl->limit()->get_int();
- int stride_m = stride_con - (stride_con > 0 ? 1 : -1);
- long trip_cnt = (limit_con - init_con + stride_m)/stride_con;
- // Note, max_jint is used to indicate unknown trip count.
- if (trip_cnt <= 0 || trip_cnt >= (long)max_jint) {
- // return a false (no maximally unroll) and the regular unroll/peel
- // route will make a small mess which CCP will fold away.
- return false;
- }
- uint trip_count = (uint)trip_cnt;
- cl->set_trip_count(trip_count);
+ uint trip_count = cl->trip_count();
+ // Note, max_juint is used to indicate unknown trip count.
+ assert(trip_count > 1, "one iteration loop should be optimized out already");
+ assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
// Real policy: if we maximally unroll, does it get too big?
// Allow the unrolled mess to get larger than standard loop
@@ -1096,7 +1122,7 @@
tty->print("Unrolling ");
loop->dump_head();
} else if (TraceLoopOpts) {
- if (loop_head->trip_count() < LoopUnrollLimit) {
+ if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
} else {
tty->print("Unroll %d ", loop_head->unrolled_count()*2);
@@ -1802,6 +1828,17 @@
// main and post loops have explicitly created zero trip guard
bool needs_guard = !cl->is_main_loop() && !cl->is_post_loop();
if (needs_guard) {
+ // Skip guard if values not overlap.
+ const TypeInt* init_t = phase->_igvn.type(cl->init_trip())->is_int();
+ const TypeInt* limit_t = phase->_igvn.type(cl->limit())->is_int();
+ int stride_con = cl->stride_con();
+ if (stride_con > 0) {
+ needs_guard = (init_t->_hi >= limit_t->_lo);
+ } else {
+ needs_guard = (init_t->_lo <= limit_t->_hi);
+ }
+ }
+ if (needs_guard) {
// Check for an obvious zero trip guard.
Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
if (inctrl->Opcode() == Op_IfTrue) {
@@ -1846,12 +1883,49 @@
return true;
}
+//------------------------------policy_do_one_iteration_loop-------------------
+// Convert one iteration loop into normal code.
+bool IdealLoopTree::policy_do_one_iteration_loop( PhaseIdealLoop *phase ) {
+ if (!_head->as_Loop()->is_valid_counted_loop())
+ return false; // Only for counted loop
+
+ CountedLoopNode *cl = _head->as_CountedLoop();
+ if (!cl->has_exact_trip_count() || cl->trip_count() != 1) {
+ return false;
+ }
+
+#ifndef PRODUCT
+ if(TraceLoopOpts) {
+ tty->print("OneIteration ");
+ this->dump_head();
+ }
+#endif
+
+ Node *init_n = cl->init_trip();
+#ifdef ASSERT
+ // Loop boundaries should be constant since trip count is exact.
+ assert(init_n->get_int() + cl->stride_con() >= cl->limit()->get_int(), "should be one iteration");
+#endif
+ // Replace the phi at loop head with the value of the init_trip.
+ // Then the CountedLoopEnd will collapse (backedge will not be taken)
+ // and all loop-invariant uses of the exit values will be correct.
+ phase->_igvn.replace_node(cl->phi(), cl->init_trip());
+ phase->C->set_major_progress();
+ return true;
+}
//=============================================================================
//------------------------------iteration_split_impl---------------------------
bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+ // Compute exact loop trip count if possible.
+ compute_exact_trip_count(phase);
+
+ // Convert one iteration loop into normal code.
+ if (policy_do_one_iteration_loop(phase))
+ return true;
+
// Check and remove empty loops (spam micro-benchmarks)
- if( policy_do_remove_empty_loop(phase) )
+ if (policy_do_remove_empty_loop(phase))
return true; // Here we removed an empty loop
bool should_peel = policy_peeling(phase); // Should we peel?
@@ -1860,40 +1934,40 @@
// Non-counted loops may be peeled; exactly 1 iteration is peeled.
// This removes loop-invariant tests (usually null checks).
- if( !_head->is_CountedLoop() ) { // Non-counted loop
+ if (!_head->is_CountedLoop()) { // Non-counted loop
if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
// Partial peel succeeded so terminate this round of loop opts
return false;
}
- if( should_peel ) { // Should we peel?
+ if (should_peel) { // Should we peel?
#ifndef PRODUCT
if (PrintOpto) tty->print_cr("should_peel");
#endif
phase->do_peeling(this,old_new);
- } else if( should_unswitch ) {
+ } else if (should_unswitch) {
phase->do_unswitching(this, old_new);
}
return true;
}
CountedLoopNode *cl = _head->as_CountedLoop();
- if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
+ if (!cl->loopexit()) return true; // Ignore various kinds of broken loops
// Do nothing special to pre- and post- loops
- if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
+ if (cl->is_pre_loop() || cl->is_post_loop()) return true;
// Compute loop trip count from profile data
compute_profile_trip_cnt(phase);
// Before attempting fancy unrolling, RCE or alignment, see if we want
// to completely unroll this loop or do loop unswitching.
- if( cl->is_normal_loop() ) {
+ if (cl->is_normal_loop()) {
if (should_unswitch) {
phase->do_unswitching(this, old_new);
return true;
}
bool should_maximally_unroll = policy_maximally_unroll(phase);
- if( should_maximally_unroll ) {
+ if (should_maximally_unroll) {
// Here we did some unrolling and peeling. Eventually we will
// completely unroll this loop and it will no longer be a loop.
phase->do_maximally_unroll(this,old_new);
@@ -1937,14 +2011,14 @@
// If we have any of these conditions (RCE, alignment, unrolling) met, then
// we switch to the pre-/main-/post-loop model. This model also covers
// peeling.
- if( should_rce || should_align || should_unroll ) {
- if( cl->is_normal_loop() ) // Convert to 'pre/main/post' loops
+ if (should_rce || should_align || should_unroll) {
+ if (cl->is_normal_loop()) // Convert to 'pre/main/post' loops
phase->insert_pre_post_loops(this,old_new, !may_rce_align);
// Adjust the pre- and main-loop limits to let the pre and post loops run
// with full checks, but the main-loop with no checks. Remove said
// checks from the main body.
- if( should_rce )
+ if (should_rce)
phase->do_range_check(this,old_new);
// Double loop body for unrolling. Adjust the minimum-trip test (will do
@@ -1952,16 +2026,16 @@
// an even number of trips). If we are peeling, we might enable some RCE
// and we'd rather unroll the post-RCE'd loop SO... do not unroll if
// peeling.
- if( should_unroll && !should_peel )
- phase->do_unroll(this,old_new, true);
+ if (should_unroll && !should_peel)
+ phase->do_unroll(this,old_new, true);
// Adjust the pre-loop limits to align the main body
// iterations.
- if( should_align )
+ if (should_align)
Unimplemented();
} else { // Else we have an unchanged counted loop
- if( should_peel ) // Might want to peel but do nothing else
+ if (should_peel) // Might want to peel but do nothing else
phase->do_peeling(this,old_new);
}
return true;
--- a/hotspot/src/share/vm/opto/loopnode.cpp Thu Apr 07 21:32:23 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp Fri Apr 08 14:56:22 2011 -0700
@@ -1450,6 +1450,21 @@
if (_head->is_CountedLoop()) {
CountedLoopNode *cl = _head->as_CountedLoop();
tty->print(" counted");
+
+ Node* init_n = cl->init_trip();
+ if (init_n != NULL && init_n->is_Con())
+ tty->print(" [%d,", cl->init_trip()->get_int());
+ else
+ tty->print(" [int,");
+ Node* limit_n = cl->limit();
+ if (limit_n != NULL && limit_n->is_Con())
+ tty->print("%d),", cl->limit()->get_int());
+ else
+ tty->print("int),");
+ int stride_con = cl->stride_con();
+ if (stride_con > 0) tty->print("+");
+ tty->print("%d", stride_con);
+
if (cl->is_pre_loop ()) tty->print(" pre" );
if (cl->is_main_loop()) tty->print(" main");
if (cl->is_post_loop()) tty->print(" post");
--- a/hotspot/src/share/vm/opto/loopnode.hpp Thu Apr 07 21:32:23 2011 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp Fri Apr 08 14:56:22 2011 -0700
@@ -57,7 +57,12 @@
protected:
short _loop_flags;
// Names for flag bitfields
- enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32 };
+ enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
+ MainHasNoPreLoop=4,
+ HasExactTripCount=8,
+ InnerLoop=16,
+ PartialPeelLoop=32,
+ PartialPeelFailed=64 };
char _unswitch_count;
enum { _unswitch_max=3 };
@@ -65,13 +70,13 @@
// Names for edge indices
enum { Self=0, EntryControl, LoopBackControl };
- int is_inner_loop() const { return _loop_flags & inner_loop; }
- void set_inner_loop() { _loop_flags |= inner_loop; }
+ int is_inner_loop() const { return _loop_flags & InnerLoop; }
+ void set_inner_loop() { _loop_flags |= InnerLoop; }
- int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; }
- void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; }
- int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; }
- void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; }
+ int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
+ void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
+ int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
+ void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; }
@@ -137,8 +142,8 @@
// the Main CountedLoop. Used to assert that we understand the graph shape.
node_idx_t _main_idx;
- // Known trip count calculated by policy_maximally_unroll
- int _trip_count;
+ // Known trip count calculated by compute_exact_trip_count()
+ uint _trip_count;
// Expected trip count from profile data
float _profile_trip_cnt;
@@ -152,7 +157,7 @@
public:
CountedLoopNode( Node *entry, Node *backedge )
- : LoopNode(entry, backedge), _trip_count(max_jint),
+ : LoopNode(entry, backedge), _main_idx(0), _trip_count(max_juint),
_profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
_node_count_before_unroll(0) {
init_class_id(Class_CountedLoop);
@@ -194,13 +199,12 @@
// A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
// Aligned, may be missing it's pre-loop.
- enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 };
- int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; }
- int is_pre_loop () const { return (_loop_flags&PrePostFlagsMask) == Pre; }
- int is_main_loop () const { return (_loop_flags&PrePostFlagsMask) == Main; }
- int is_post_loop () const { return (_loop_flags&PrePostFlagsMask) == Post; }
- int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
- void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
+ int is_normal_loop() const { return (_loop_flags&PreMainPostFlagsMask) == Normal; }
+ int is_pre_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Pre; }
+ int is_main_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Main; }
+ int is_post_loop () const { return (_loop_flags&PreMainPostFlagsMask) == Post; }
+ int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
+ void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }
int main_idx() const { return _main_idx; }
@@ -208,10 +212,19 @@
void set_pre_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
void set_main_loop ( ) { assert(is_normal_loop(),""); _loop_flags |= Main; }
void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
- void set_normal_loop( ) { _loop_flags &= ~PrePostFlagsMask; }
+ void set_normal_loop( ) { _loop_flags &= ~PreMainPostFlagsMask; }
+
+ void set_trip_count(uint tc) { _trip_count = tc; }
+ uint trip_count() { return _trip_count; }
- void set_trip_count(int tc) { _trip_count = tc; }
- int trip_count() { return _trip_count; }
+ bool has_exact_trip_count() const { return (_loop_flags & HasExactTripCount) != 0; }
+ void set_exact_trip_count(uint tc) {
+ _trip_count = tc;
+ _loop_flags |= HasExactTripCount;
+ }
+ void set_nonexact_trip_count() {
+ _loop_flags &= ~HasExactTripCount;
+ }
void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
float profile_trip_cnt() { return _profile_trip_cnt; }
@@ -384,6 +397,9 @@
// Micro-benchmark spamming. Remove empty loops.
bool policy_do_remove_empty_loop( PhaseIdealLoop *phase );
+ // Convert one iteration loop into normal code.
+ bool policy_do_one_iteration_loop( PhaseIdealLoop *phase );
+
// Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
// make some loop-invariant test (usually a null-check) happen before the
// loop.
@@ -412,6 +428,9 @@
// Return TRUE if "iff" is a range check.
bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
+ // Compute loop exact trip count if possible
+ void compute_exact_trip_count( PhaseIdealLoop *phase );
+
// Compute loop trip count from profile data
void compute_profile_trip_cnt( PhaseIdealLoop *phase );