6384206: Phis which are later unneeded are impairing our ability to inline based on static types
authornever
Wed, 17 Sep 2008 12:59:52 -0700
changeset 1399 9648dfd4ce09
parent 1398 342890a5d031
child 1400 afd034bb8c2e
6384206: Phis which are later unneeded are impairing our ability to inline based on static types Reviewed-by: rasbold, jrose
hotspot/src/share/vm/ci/ciMethodBlocks.cpp
hotspot/src/share/vm/ci/ciMethodBlocks.hpp
hotspot/src/share/vm/ci/ciTypeFlow.cpp
hotspot/src/share/vm/ci/ciTypeFlow.hpp
hotspot/src/share/vm/includeDB_compiler2
hotspot/src/share/vm/opto/bytecodeInfo.cpp
hotspot/src/share/vm/opto/cfgnode.cpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/compile.hpp
hotspot/src/share/vm/opto/doCall.cpp
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/loopTransform.cpp
hotspot/src/share/vm/opto/loopnode.cpp
hotspot/src/share/vm/opto/loopnode.hpp
hotspot/src/share/vm/opto/loopopts.cpp
hotspot/src/share/vm/opto/matcher.cpp
hotspot/src/share/vm/opto/parse.hpp
hotspot/src/share/vm/opto/parse1.cpp
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -49,7 +49,7 @@
 // first half.  Returns the range beginning at bci.
 ciBlock *ciMethodBlocks::split_block_at(int bci) {
   ciBlock *former_block = block_containing(bci);
-  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, this, former_block->start_bci());
+  ciBlock *new_block = new(_arena) ciBlock(_method, _num_blocks++, former_block->start_bci());
   _blocks->append(new_block);
   assert(former_block != NULL, "must not be NULL");
   new_block->set_limit_bci(bci);
@@ -83,7 +83,7 @@
   if (cb == NULL ) {
     // This is our first time visiting this bytecode.  Create
     // a fresh block and assign it this starting point.
-    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, this, bci);
+    ciBlock *nb = new(_arena) ciBlock(_method, _num_blocks++, bci);
     _blocks->append(nb);
      _bci_to_block[bci] = nb;
     return nb;
@@ -98,6 +98,11 @@
   }
 }
 
+ciBlock *ciMethodBlocks::make_dummy_block() {
+  ciBlock *dum = new(_arena) ciBlock(_method, -1, 0);
+  return dum;
+}
+
 void ciMethodBlocks::do_analysis() {
   ciBytecodeStream s(_method);
   ciBlock *cur_block = block_containing(0);
@@ -253,7 +258,7 @@
   Copy::zero_to_words((HeapWord*) _bci_to_block, b2bsize / sizeof(HeapWord));
 
   // create initial block covering the entire method
-  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, this, 0);
+  ciBlock *b = new(arena) ciBlock(_method, _num_blocks++, 0);
   _blocks->append(b);
   _bci_to_block[0] = b;
 
@@ -334,7 +339,7 @@
 #endif
 
 
-ciBlock::ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci) :
+ciBlock::ciBlock(ciMethod *method, int index, int start_bci) :
 #ifndef PRODUCT
                          _method(method),
 #endif
--- a/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciMethodBlocks.hpp	Wed Sep 17 12:59:52 2008 -0700
@@ -48,6 +48,8 @@
   int num_blocks()  { return _num_blocks;}
   void clear_processed();
 
+  ciBlock *make_dummy_block(); // a block not associated with a bci
+
 #ifndef PRODUCT
   void dump();
 #endif
@@ -81,7 +83,7 @@
     fall_through_bci = -1
   };
 
-  ciBlock(ciMethod *method, int index, ciMethodBlocks *mb, int start_bci);
+  ciBlock(ciMethod *method, int index, int start_bci);
   int start_bci() const         { return _start_bci; }
   int limit_bci() const         { return _limit_bci; }
   int control_bci() const       { return _control_bci; }
@@ -94,7 +96,6 @@
   int ex_limit_bci() const      { return _ex_limit_bci; }
   bool contains(int bci) const { return start_bci() <= bci && bci < limit_bci(); }
 
-
   // flag handling
   bool  processed() const           { return (_flags & Processed) != 0; }
   bool  is_handler() const          { return (_flags & Handler) != 0; }
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -338,8 +338,10 @@
   }
   _trap_bci = -1;
   _trap_index = 0;
+  _def_locals.clear();
 }
 
+
 // ------------------------------------------------------------------
 // ciTypeFlow::get_start_state
 //
@@ -735,7 +737,7 @@
 void ciTypeFlow::StateVector::do_new(ciBytecodeStream* str) {
   bool will_link;
   ciKlass* klass = str->get_klass(will_link);
-  if (!will_link) {
+  if (!will_link || str->is_unresolved_klass()) {
     trap(str, klass, str->get_klass_index());
   } else {
     push_object(klass);
@@ -1268,7 +1270,9 @@
     }
   case Bytecodes::_iinc:
     {
-      check_int(local(str->get_index()));
+      int lnum = str->get_index();
+      check_int(local(lnum));
+      store_to_local(lnum);
       break;
     }
   case Bytecodes::_iload:   load_local_int(str->get_index()); break;
@@ -1506,6 +1510,46 @@
 }
 #endif
 
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::next
+//
+void ciTypeFlow::SuccIter::next() {
+  int succ_ct = _pred->successors()->length();
+  int next = _index + 1;
+  if (next < succ_ct) {
+    _index = next;
+    _succ = _pred->successors()->at(next);
+    return;
+  }
+  for (int i = next - succ_ct; i < _pred->exceptions()->length(); i++) {
+    // Do not compile any code for unloaded exception types.
+    // Following compiler passes are responsible for doing this also.
+    ciInstanceKlass* exception_klass = _pred->exc_klasses()->at(i);
+    if (exception_klass->is_loaded()) {
+      _index = next;
+      _succ = _pred->exceptions()->at(i);
+      return;
+    }
+    next++;
+  }
+  _index = -1;
+  _succ = NULL;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::SuccIter::set_succ
+//
+void ciTypeFlow::SuccIter::set_succ(Block* succ) {
+  int succ_ct = _pred->successors()->length();
+  if (_index < succ_ct) {
+    _pred->successors()->at_put(_index, succ);
+  } else {
+    int idx = _index - succ_ct;
+    _pred->exceptions()->at_put(idx, succ);
+  }
+}
+
 // ciTypeFlow::Block
 //
 // A basic block.
@@ -1526,10 +1570,11 @@
   _jsrs = new_jsrs;
   _next = NULL;
   _on_work_list = false;
-  _pre_order = -1; assert(!has_pre_order(), "");
-  _private_copy = false;
+  _backedge_copy = false;
+  _exception_entry = false;
   _trap_bci = -1;
   _trap_index = 0;
+  df_init();
 
   if (CITraceTypeFlow) {
     tty->print_cr(">> Created new block");
@@ -1541,55 +1586,13 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::clone_loop_head
-//
-ciTypeFlow::Block*
-ciTypeFlow::Block::clone_loop_head(ciTypeFlow* analyzer,
-                                   int branch_bci,
-                                   ciTypeFlow::Block* target,
-                                   ciTypeFlow::JsrSet* jsrs) {
-  // Loop optimizations are not performed on Tier1 compiles. Do nothing.
-  if (analyzer->env()->comp_level() < CompLevel_full_optimization) {
-    return target;
-  }
-
-  // The current block ends with a branch.
-  //
-  // If the target block appears to be the test-clause of a for loop, and
-  // it is not too large, and it has not yet been cloned, clone it.
-  // The pre-existing copy becomes the private clone used only by
-  // the initial iteration of the loop.  (We know we are simulating
-  // the initial iteration right now, since we have never calculated
-  // successors before for this block.)
-
-  if (branch_bci <= start()
-      && (target->limit() - target->start()) <= CICloneLoopTestLimit
-      && target->private_copy_count() == 0) {
-    // Setting the private_copy bit ensures that the target block cannot be
-    // reached by any other paths, such as fall-in from the loop body.
-    // The private copy will be accessible only on successor lists
-    // created up to this point.
-    target->set_private_copy(true);
-    if (CITraceTypeFlow) {
-      tty->print(">> Cloning a test-clause block ");
-      print_value_on(tty);
-      tty->cr();
-    }
-    // If the target is the current block, then later on a new copy of the
-    // target block will be created when its bytecodes are reached by
-    // an alternate path. (This is the case for loops with the loop
-    // head at the bci-wise bottom of the loop, as with pre-1.4.2 javac.)
-    //
-    // Otherwise, duplicate the target block now and use it immediately.
-    // (The case for loops with the loop head at the bci-wise top of the
-    // loop, as with 1.4.2 javac.)
-    //
-    // In either case, the new copy of the block will remain public.
-    if (target != this) {
-      target = analyzer->block_at(branch_bci, jsrs);
-    }
-  }
-  return target;
+// ciTypeFlow::Block::df_init
+void ciTypeFlow::Block::df_init() {
+  _pre_order = -1; assert(!has_pre_order(), "");
+  _post_order = -1; assert(!has_post_order(), "");
+  _loop = NULL;
+  _irreducible_entry = false;
+  _rpo_next = NULL;
 }
 
 // ------------------------------------------------------------------
@@ -1644,7 +1647,6 @@
       case Bytecodes::_ifnull:       case Bytecodes::_ifnonnull:
         // Our successors are the branch target and the next bci.
         branch_bci = str->get_dest();
-        clone_loop_head(analyzer, branch_bci, this, jsrs);
         _successors =
           new (arena) GrowableArray<Block*>(arena, 2, 0, NULL);
         assert(_successors->length() == IF_NOT_TAKEN, "");
@@ -1658,14 +1660,7 @@
         _successors =
           new (arena) GrowableArray<Block*>(arena, 1, 0, NULL);
         assert(_successors->length() == GOTO_TARGET, "");
-        target = analyzer->block_at(branch_bci, jsrs);
-        // If the target block has not been visited yet, and looks like
-        // a two-way branch, attempt to clone it if it is a loop head.
-        if (target->_successors != NULL
-            && target->_successors->length() == (IF_TAKEN + 1)) {
-          target = clone_loop_head(analyzer, branch_bci, target, jsrs);
-        }
-        _successors->append(target);
+        _successors->append(analyzer->block_at(branch_bci, jsrs));
         break;
 
       case Bytecodes::_jsr:
@@ -1801,65 +1796,60 @@
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::is_simpler_than
-//
-// A relation used to order our work list.  We work on a block earlier
-// if it has a smaller jsr stack or it occurs earlier in the program
-// text.
-//
-// Note: maybe we should redo this functionality to make blocks
-// which correspond to exceptions lower priority.
-bool ciTypeFlow::Block::is_simpler_than(ciTypeFlow::Block* other) {
-  if (other == NULL) {
-    return true;
-  } else {
-    int size1 = _jsrs->size();
-    int size2 = other->_jsrs->size();
-    if (size1 < size2) {
-      return true;
-    } else if (size2 < size1) {
-      return false;
-    } else {
-#if 0
-      if (size1 > 0) {
-        int r1 = _jsrs->record_at(0)->return_address();
-        int r2 = _jsrs->record_at(0)->return_address();
-        if (r1 < r2) {
-          return true;
-        } else if (r2 < r1) {
-          return false;
-        } else {
-          int e1 = _jsrs->record_at(0)->return_address();
-          int e2 = _jsrs->record_at(0)->return_address();
-          if (e1 < e2) {
-            return true;
-          } else if (e2 < e1) {
-            return false;
-          }
-        }
-      }
-#endif
-      return (start() <= other->start());
-    }
-  }
+// ciTypeFlow::Block::set_backedge_copy
+// Use this only to make a pre-existing public block into a backedge copy.
+void ciTypeFlow::Block::set_backedge_copy(bool z) {
+  assert(z || (z == is_backedge_copy()), "cannot make a backedge copy public");
+  _backedge_copy = z;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::Block::set_private_copy
-// Use this only to make a pre-existing public block into a private copy.
-void ciTypeFlow::Block::set_private_copy(bool z) {
-  assert(z || (z == is_private_copy()), "cannot make a private copy public");
-  _private_copy = z;
+// ciTypeFlow::Block::is_clonable_exit
+//
+// At most 2 normal successors, one of which continues looping,
+// and all exceptional successors must exit.
+bool ciTypeFlow::Block::is_clonable_exit(ciTypeFlow::Loop* lp) {
+  int normal_cnt  = 0;
+  int in_loop_cnt = 0;
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (iter.is_normal_ctrl()) {
+      if (++normal_cnt > 2) return false;
+      if (lp->contains(succ->loop())) {
+        if (++in_loop_cnt > 1) return false;
+      }
+    } else {
+      if (lp->contains(succ->loop())) return false;
+    }
+  }
+  return in_loop_cnt == 1;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Block::looping_succ
+//
+ciTypeFlow::Block* ciTypeFlow::Block::looping_succ(ciTypeFlow::Loop* lp) {
+  assert(successors()->length() <= 2, "at most 2 normal successors");
+  for (SuccIter iter(this); !iter.done(); iter.next()) {
+    Block* succ = iter.succ();
+    if (lp->contains(succ->loop())) {
+      return succ;
+    }
+  }
+  return NULL;
 }
 
 #ifndef PRODUCT
 // ------------------------------------------------------------------
 // ciTypeFlow::Block::print_value_on
 void ciTypeFlow::Block::print_value_on(outputStream* st) const {
-  if (has_pre_order())  st->print("#%-2d ", pre_order());
+  if (has_pre_order()) st->print("#%-2d ", pre_order());
+  if (has_rpo())       st->print("rpo#%-2d ", rpo());
   st->print("[%d - %d)", start(), limit());
+  if (is_loop_head()) st->print(" lphd");
+  if (is_irreducible_entry()) st->print(" irred");
   if (_jsrs->size() > 0) { st->print("/");  _jsrs->print_on(st); }
-  if (is_private_copy())  st->print("/private_copy");
+  if (is_backedge_copy())  st->print("/backedge_copy");
 }
 
 // ------------------------------------------------------------------
@@ -1871,6 +1861,16 @@
   st->print_cr("  ====================================================  ");
   st->print ("  ");
   print_value_on(st);
+  st->print(" Stored locals: "); def_locals()->print_on(st, outer()->method()->max_locals()); tty->cr();
+  if (loop() && loop()->parent() != NULL) {
+    st->print(" loops:");
+    Loop* lp = loop();
+    do {
+      st->print(" %d<-%d", lp->head()->pre_order(),lp->tail()->pre_order());
+      if (lp->is_irreducible()) st->print("(ir)");
+      lp = lp->parent();
+    } while (lp->parent() != NULL);
+  }
   st->cr();
   _state->print_on(st);
   if (_successors == NULL) {
@@ -1907,6 +1907,21 @@
 }
 #endif
 
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::LocalSet::print_on
+void ciTypeFlow::LocalSet::print_on(outputStream* st, int limit) const {
+  st->print("{");
+  for (int i = 0; i < max; i++) {
+    if (test(i)) st->print(" %d", i);
+  }
+  if (limit > max) {
+    st->print(" %d..%d ", max, limit);
+  }
+  st->print(" }");
+}
+#endif
+
 // ciTypeFlow
 //
 // This is a pass over the bytecodes which computes the following:
@@ -1922,12 +1937,11 @@
   _max_locals = method->max_locals();
   _max_stack = method->max_stack();
   _code_size = method->code_size();
+  _has_irreducible_entry = false;
   _osr_bci = osr_bci;
   _failure_reason = NULL;
   assert(start_bci() >= 0 && start_bci() < code_size() , "correct osr_bci argument");
-
   _work_list = NULL;
-  _next_pre_order = 0;
 
   _ciblock_count = _methodBlocks->num_blocks();
   _idx_to_blocklist = NEW_ARENA_ARRAY(arena(), GrowableArray<Block*>*, _ciblock_count);
@@ -1949,12 +1963,6 @@
   _work_list = next_block->next();
   next_block->set_next(NULL);
   next_block->set_on_work_list(false);
-  if (!next_block->has_pre_order()) {
-    // Assign "pre_order" as each new block is taken from the work list.
-    // This number may be used by following phases to order block visits.
-    assert(!have_block_count(), "must not have mapped blocks yet")
-    next_block->set_pre_order(_next_pre_order++);
-  }
   return next_block;
 }
 
@@ -1962,30 +1970,37 @@
 // ciTypeFlow::add_to_work_list
 //
 // Add a basic block to our work list.
+// List is sorted by decreasing postorder sort (same as increasing RPO)
 void ciTypeFlow::add_to_work_list(ciTypeFlow::Block* block) {
   assert(!block->is_on_work_list(), "must not already be on work list");
 
   if (CITraceTypeFlow) {
-    tty->print(">> Adding block%s ", block->has_pre_order() ? " (again)" : "");
+    tty->print(">> Adding block ");
     block->print_value_on(tty);
     tty->print_cr(" to the work list : ");
   }
 
   block->set_on_work_list(true);
-  if (block->is_simpler_than(_work_list)) {
+
+  // decreasing post order sort
+
+  Block* prev = NULL;
+  Block* current = _work_list;
+  int po = block->post_order();
+  while (current != NULL) {
+    if (!current->has_post_order() || po > current->post_order())
+      break;
+    prev = current;
+    current = current->next();
+  }
+  if (prev == NULL) {
     block->set_next(_work_list);
     _work_list = block;
   } else {
-    Block *temp = _work_list;
-    while (!block->is_simpler_than(temp->next())) {
-      if (CITraceTypeFlow) {
-        tty->print(".");
-      }
-      temp = temp->next();
-    }
-    block->set_next(temp->next());
-    temp->set_next(block);
+    block->set_next(current);
+    prev->set_next(block);
   }
+
   if (CITraceTypeFlow) {
     tty->cr();
   }
@@ -2008,7 +2023,7 @@
   assert(ciblk->start_bci() == bci, "bad ciBlock boundaries");
   Block* block = get_block_for(ciblk->index(), jsrs, option);
 
-  assert(block == NULL? (option == no_create): block->is_private_copy() == (option == create_private_copy), "create option consistent with result");
+  assert(block == NULL? (option == no_create): block->is_backedge_copy() == (option == create_backedge_copy), "create option consistent with result");
 
   if (CITraceTypeFlow) {
     if (block != NULL) {
@@ -2072,8 +2087,9 @@
     }
 
     if (block->meet_exception(exception_klass, state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2091,8 +2107,9 @@
   for (int i = 0; i < len; i++) {
     Block* block = successors->at(i);
     if (block->meet(state)) {
-      // Block was modified.  Add it to the work list.
-      if (!block->is_on_work_list()) {
+      // Block was modified and has PO.  Add it to the work list.
+      if (block->has_post_order() &&
+          !block->is_on_work_list()) {
         add_to_work_list(block);
       }
     }
@@ -2133,6 +2150,111 @@
   return true;
 }
 
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_heads
+//
+// Clone the loop heads
+bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  bool rslt = false;
+  for (PreorderLoops iter(loop_tree_root()); !iter.done(); iter.next()) {
+    lp = iter.current();
+    Block* head = lp->head();
+    if (lp == loop_tree_root() ||
+        lp->is_irreducible() ||
+        !head->is_clonable_exit(lp))
+      continue;
+
+    // check not already cloned
+    if (head->backedge_copy_count() != 0)
+      continue;
+
+    // check _no_ shared head below us
+    Loop* ch;
+    for (ch = lp->child(); ch != NULL && ch->head() != head; ch = ch->sibling());
+    if (ch != NULL)
+      continue;
+
+    // Clone head
+    Block* new_head = head->looping_succ(lp);
+    Block* clone = clone_loop_head(lp, temp_vector, temp_set);
+    // Update lp's info
+    clone->set_loop(lp);
+    lp->set_head(new_head);
+    lp->set_tail(clone);
+    // And move original head into outer loop
+    head->set_loop(lp->parent());
+
+    rslt = true;
+  }
+  return rslt;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::clone_loop_head
+//
+// Clone lp's head and replace tail's successors with clone.
+//
+//  |
+//  v
+// head <-> body
+//  |
+//  v
+// exit
+//
+// new_head
+//
+//  |
+//  v
+// head ----------\
+//  |             |
+//  |             v
+//  |  clone <-> body
+//  |    |
+//  | /--/
+//  | |
+//  v v
+// exit
+//
+ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set) {
+  Block* head = lp->head();
+  Block* tail = lp->tail();
+  if (CITraceTypeFlow) {
+    tty->print(">> Requesting clone of loop head "); head->print_value_on(tty);
+    tty->print("  for predecessor ");                tail->print_value_on(tty);
+    tty->cr();
+  }
+  Block* clone = block_at(head->start(), head->jsrs(), create_backedge_copy);
+  assert(clone->backedge_copy_count() == 1, "one backedge copy for all back edges");
+
+  assert(!clone->has_pre_order(), "just created");
+  clone->set_next_pre_order();
+
+  // Insert clone after (orig) tail in reverse post order
+  clone->set_rpo_next(tail->rpo_next());
+  tail->set_rpo_next(clone);
+
+  // tail->head becomes tail->clone
+  for (SuccIter iter(tail); !iter.done(); iter.next()) {
+    if (iter.succ() == head) {
+      iter.set_succ(clone);
+      break;
+    }
+  }
+  flow_block(tail, temp_vector, temp_set);
+  if (head == tail) {
+    // For self-loops, clone->head becomes clone->clone
+    flow_block(clone, temp_vector, temp_set);
+    for (SuccIter iter(clone); !iter.done(); iter.next()) {
+      if (iter.succ() == head) {
+        iter.set_succ(clone);
+        break;
+      }
+    }
+  }
+  flow_block(clone, temp_vector, temp_set);
+
+  return clone;
+}
 
 // ------------------------------------------------------------------
 // ciTypeFlow::flow_block
@@ -2159,11 +2281,14 @@
 
   // Grab the state from the current block.
   block->copy_state_into(state);
+  state->def_locals()->clear();
 
   GrowableArray<Block*>*           exceptions = block->exceptions();
   GrowableArray<ciInstanceKlass*>* exc_klasses = block->exc_klasses();
   bool has_exceptions = exceptions->length() > 0;
 
+  bool exceptions_used = false;
+
   ciBytecodeStream str(method());
   str.reset_to_bci(start);
   Bytecodes::Code code;
@@ -2172,6 +2297,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
     // Apply the effects of the current bytecode to our state.
     bool res = state->apply_one_bytecode(&str);
@@ -2189,9 +2315,14 @@
         block->print_on(tty);
       }
 
+      // Save set of locals defined in this block
+      block->def_locals()->add(state->def_locals());
+
       // Record (no) successors.
       block->successors(&str, state, jsrs);
 
+      assert(!has_exceptions || exceptions_used, "Not removing exceptions");
+
       // Discontinue interpretation of this Block.
       return;
     }
@@ -2202,6 +2333,7 @@
     // Check for exceptional control flow from this point.
     if (has_exceptions && can_trap(str)) {
       flow_exceptions(exceptions, exc_klasses, state);
+      exceptions_used = true;
     }
 
     // Fix the JsrSet to reflect effect of the bytecode.
@@ -2218,11 +2350,306 @@
     successors = block->successors(&str, NULL, NULL);
   }
 
+  // Save set of locals defined in this block
+  block->def_locals()->add(state->def_locals());
+
+  // Remove untaken exception paths
+  if (!exceptions_used)
+    exceptions->clear();
+
   // Pass our state to successors.
   flow_successors(successors, state);
 }
 
 // ------------------------------------------------------------------
+// ciTypeFlow::PostOrderLoops::next
+//
+// Advance to next loop tree using a postorder, left-to-right traversal.
+void ciTypeFlow::PostorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+    while (_current->child() != NULL) {
+      _current = _current->child();
+    }
+  } else {
+    _current = _current->parent();
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::PreOrderLoops::next
+//
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void ciTypeFlow::PreorderLoops::next() {
+  assert(!done(), "must not be done.");
+  if (_current->child() != NULL) {
+    _current = _current->child();
+  } else if (_current->sibling() != NULL) {
+    _current = _current->sibling();
+  } else {
+    while (_current != _root && _current->sibling() == NULL) {
+      _current = _current->parent();
+    }
+    if (_current == _root) {
+      _current = NULL;
+      assert(done(), "must be done.");
+    } else {
+      assert(_current->sibling() != NULL, "must be more to do");
+      _current = _current->sibling();
+    }
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::sorted_merge
+//
+// Merge the branch lp into this branch, sorting on the loop head
+// pre_orders. Returns the leaf of the merged branch.
+// Child and sibling pointers will be setup later.
+// Sort is (looking from leaf towards the root)
+//  descending on primary key: loop head's pre_order, and
+//  ascending  on secondary key: loop tail's pre_order.
+ciTypeFlow::Loop* ciTypeFlow::Loop::sorted_merge(Loop* lp) {
+  Loop* leaf = this;
+  Loop* prev = NULL;
+  Loop* current = leaf;
+  while (lp != NULL) {
+    int lp_pre_order = lp->head()->pre_order();
+    // Find insertion point for "lp"
+    while (current != NULL) {
+      if (current == lp)
+        return leaf; // Already in list
+      if (current->head()->pre_order() < lp_pre_order)
+        break;
+      if (current->head()->pre_order() == lp_pre_order &&
+          current->tail()->pre_order() > lp->tail()->pre_order()) {
+        break;
+      }
+      prev = current;
+      current = current->parent();
+    }
+    Loop* next_lp = lp->parent(); // Save future list of items to insert
+    // Insert lp before current
+    lp->set_parent(current);
+    if (prev != NULL) {
+      prev->set_parent(lp);
+    } else {
+      leaf = lp;
+    }
+    prev = lp;     // Inserted item is new prev[ious]
+    lp = next_lp;  // Next item to insert
+  }
+  return leaf;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::build_loop_tree
+//
+// Incrementally build loop tree.
+void ciTypeFlow::build_loop_tree(Block* blk) {
+  assert(!blk->is_post_visited(), "precondition");
+  Loop* innermost = NULL; // merge of loop tree branches over all successors
+
+  for (SuccIter iter(blk); !iter.done(); iter.next()) {
+    Loop*  lp   = NULL;
+    Block* succ = iter.succ();
+    if (!succ->is_post_visited()) {
+      // Found backedge since predecessor post visited, but successor is not
+      assert(succ->pre_order() <= blk->pre_order(), "should be backedge");
+
+      // Create a LoopNode to mark this loop.
+      lp = new (arena()) Loop(succ, blk);
+      if (succ->loop() == NULL)
+        succ->set_loop(lp);
+      // succ->loop will be updated to innermost loop on a later call, when blk==succ
+
+    } else {  // Nested loop
+      lp = succ->loop();
+
+      // If succ is loop head, find outer loop.
+      while (lp != NULL && lp->head() == succ) {
+        lp = lp->parent();
+      }
+      if (lp == NULL) {
+        // Infinite loop, it's parent is the root
+        lp = loop_tree_root();
+      }
+    }
+
+    // Check for irreducible loop.
+    // Successor has already been visited. If the successor's loop head
+    // has already been post-visited, then this is another entry into the loop.
+    while (lp->head()->is_post_visited() && lp != loop_tree_root()) {
+      _has_irreducible_entry = true;
+      lp->set_irreducible(succ);
+      if (!succ->is_on_work_list()) {
+        // Assume irreducible entries need more data flow
+        add_to_work_list(succ);
+      }
+      lp = lp->parent();
+      assert(lp != NULL, "nested loop must have parent by now");
+    }
+
+    // Merge loop tree branch for all successors.
+    innermost = innermost == NULL ? lp : innermost->sorted_merge(lp);
+
+  } // end loop
+
+  if (innermost == NULL) {
+    assert(blk->successors()->length() == 0, "CFG exit");
+    blk->set_loop(loop_tree_root());
+  } else if (innermost->head() == blk) {
+    // If loop header, complete the tree pointers
+    if (blk->loop() != innermost) {
+#if ASSERT
+      assert(blk->loop()->head() == innermost->head(), "same head");
+      Loop* dl;
+      for (dl = innermost; dl != NULL && dl != blk->loop(); dl = dl->parent());
+      assert(dl == blk->loop(), "blk->loop() already in innermost list");
+#endif
+      blk->set_loop(innermost);
+    }
+    innermost->def_locals()->add(blk->def_locals());
+    Loop* l = innermost;
+    Loop* p = l->parent();
+    while (p && l->head() == blk) {
+      l->set_sibling(p->child());  // Put self on parents 'next child'
+      p->set_child(l);             // Make self the first child of parent
+      p->def_locals()->add(l->def_locals());
+      l = p;                       // Walk up the parent chain
+      p = l->parent();
+    }
+  } else {
+    blk->set_loop(innermost);
+    innermost->def_locals()->add(blk->def_locals());
+  }
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::contains
+//
+// Returns true if lp is nested loop.
+bool ciTypeFlow::Loop::contains(ciTypeFlow::Loop* lp) const {
+  assert(lp != NULL, "");
+  if (this == lp || head() == lp->head()) return true;
+  int depth1 = depth();
+  int depth2 = lp->depth();
+  if (depth1 > depth2)
+    return false;
+  while (depth1 < depth2) {
+    depth2--;
+    lp = lp->parent();
+  }
+  return this == lp;
+}
+
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::depth
+//
+// Loop depth
+int ciTypeFlow::Loop::depth() const {
+  int dp = 0;
+  for (Loop* lp = this->parent(); lp != NULL; lp = lp->parent())
+    dp++;
+  return dp;
+}
+
+#ifndef PRODUCT
+// ------------------------------------------------------------------
+// ciTypeFlow::Loop::print
+void ciTypeFlow::Loop::print(outputStream* st, int indent) const {
+  for (int i = 0; i < indent; i++) st->print(" ");
+  st->print("%d<-%d %s",
+            is_root() ? 0 : this->head()->pre_order(),
+            is_root() ? 0 : this->tail()->pre_order(),
+            is_irreducible()?" irr":"");
+  st->print(" defs: ");
+  def_locals()->print_on(st, _head->outer()->method()->max_locals());
+  st->cr();
+  for (Loop* ch = child(); ch != NULL; ch = ch->sibling())
+    ch->print(st, indent+2);
+}
+#endif
+
+// ------------------------------------------------------------------
+// ciTypeFlow::df_flow_types
+//
+// Perform the depth first type flow analysis. Helper for flow_types.
+void ciTypeFlow::df_flow_types(Block* start,
+                               bool do_flow,
+                               StateVector* temp_vector,
+                               JsrSet* temp_set) {
+  int dft_len = 100;
+  GrowableArray<Block*> stk(arena(), dft_len, 0, NULL);
+
+  ciBlock* dummy = _methodBlocks->make_dummy_block();
+  JsrSet* root_set = new JsrSet(NULL, 0);
+  Block* root_head = new (arena()) Block(this, dummy, root_set);
+  Block* root_tail = new (arena()) Block(this, dummy, root_set);
+  root_head->set_pre_order(0);
+  root_head->set_post_order(0);
+  root_tail->set_pre_order(max_jint);
+  root_tail->set_post_order(max_jint);
+  set_loop_tree_root(new (arena()) Loop(root_head, root_tail));
+
+  stk.push(start);
+
+  _next_pre_order = 0;  // initialize pre_order counter
+  _rpo_list = NULL;
+  int next_po = 0;      // initialize post_order counter
+
+  // Compute RPO and the control flow graph
+  int size;
+  while ((size = stk.length()) > 0) {
+    Block* blk = stk.top(); // Leave node on stack
+    if (!blk->is_visited()) {
+      // forward arc in graph
+      assert (!blk->has_pre_order(), "");
+      blk->set_next_pre_order();
+
+      if (_next_pre_order >= MaxNodeLimit / 2) {
+        // Too many basic blocks.  Bail out.
+        // This can happen when try/finally constructs are nested to depth N,
+        // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
+        // "MaxNodeLimit / 2" is used because probably the parser will
+        // generate at least twice that many nodes and bail out.
+        record_failure("too many basic blocks");
+        return;
+      }
+      if (do_flow) {
+        flow_block(blk, temp_vector, temp_set);
+        if (failing()) return; // Watch for bailouts.
+      }
+    } else if (!blk->is_post_visited()) {
+      // cross or back arc
+      for (SuccIter iter(blk); !iter.done(); iter.next()) {
+        Block* succ = iter.succ();
+        if (!succ->is_visited()) {
+          stk.push(succ);
+        }
+      }
+      if (stk.length() == size) {
+        // There were no additional children, post visit node now
+        stk.pop(); // Remove node from stack
+
+        build_loop_tree(blk);
+        blk->set_post_order(next_po++);   // Assign post order
+        prepend_to_rpo_list(blk);
+        assert(blk->is_post_visited(), "");
+
+        if (blk->is_loop_head() && !blk->is_on_work_list()) {
+          // Assume loop heads need more data flow
+          add_to_work_list(blk);
+        }
+      }
+    } else {
+      stk.pop(); // Remove post-visited node from stack
+    }
+  }
+}
+
+// ------------------------------------------------------------------
 // ciTypeFlow::flow_types
 //
 // Perform the type flow analysis, creating and cloning Blocks as
@@ -2233,91 +2660,93 @@
   JsrSet* temp_set = new JsrSet(NULL, 16);
 
   // Create the method entry block.
-  Block* block = block_at(start_bci(), temp_set);
-  block->set_pre_order(_next_pre_order++);
-  assert(block->is_start(), "start block must have order #0");
+  Block* start = block_at(start_bci(), temp_set);
 
   // Load the initial state into it.
   const StateVector* start_state = get_start_state();
   if (failing())  return;
-  block->meet(start_state);
-  add_to_work_list(block);
+  start->meet(start_state);
+
+  // Depth first visit
+  df_flow_types(start, true /*do flow*/, temp_vector, temp_set);
 
-  // Trickle away.
-  while (!work_list_empty()) {
-    Block* block = work_list_next();
-    flow_block(block, temp_vector, temp_set);
+  if (failing())  return;
+  assert(_rpo_list == start, "must be start");
 
+  // Any loops found?
+  if (loop_tree_root()->child() != NULL &&
+      env()->comp_level() >= CompLevel_full_optimization) {
+      // Loop optimizations are not performed on Tier1 compiles.
+
+    bool changed = clone_loop_heads(loop_tree_root(), temp_vector, temp_set);
 
-    // NodeCountCutoff is the number of nodes at which the parser
-    // will bail out.  Probably if we already have lots of BBs,
-    // the parser will generate at least twice that many nodes and bail out.
-    // Therefore, this is a conservatively large limit at which to
-    // bail out in the pre-parse typeflow pass.
-    int block_limit = MaxNodeLimit / 2;
+    // If some loop heads were cloned, recompute postorder and loop tree
+    if (changed) {
+      loop_tree_root()->set_child(NULL);
+      for (Block* blk = _rpo_list; blk != NULL;) {
+        Block* next = blk->rpo_next();
+        blk->df_init();
+        blk = next;
+      }
+      df_flow_types(start, false /*no flow*/, temp_vector, temp_set);
+    }
+  }
 
-    if (_next_pre_order >= block_limit) {
-      // Too many basic blocks.  Bail out.
-      //
-      // This can happen when try/finally constructs are nested to depth N,
-      // and there is O(2**N) cloning of jsr bodies.  See bug 4697245!
-      record_failure("too many basic blocks");
-      return;
-    }
+  if (CITraceTypeFlow) {
+    tty->print_cr("\nLoop tree");
+    loop_tree_root()->print();
+  }
+
+  // Continue flow analysis until fixed point reached
+
+  debug_only(int max_block = _next_pre_order;)
 
-    // Watch for bailouts.
-    if (failing())  return;
+  while (!work_list_empty()) {
+    Block* blk = work_list_next();
+    assert (blk->has_post_order(), "post order assigned above");
+
+    flow_block(blk, temp_vector, temp_set);
+
+    assert (max_block == _next_pre_order, "no new blocks");
+    assert (!failing(), "no more bailouts");
   }
 }
 
 // ------------------------------------------------------------------
 // ciTypeFlow::map_blocks
 //
-// Create the block map, which indexes blocks in pre_order.
+// Create the block map, which indexes blocks in reverse post-order.
 void ciTypeFlow::map_blocks() {
   assert(_block_map == NULL, "single initialization");
-  int pre_order_limit = _next_pre_order;
-  _block_map = NEW_ARENA_ARRAY(arena(), Block*, pre_order_limit);
-  assert(pre_order_limit == block_count(), "");
-  int po;
-  for (po = 0; po < pre_order_limit; po++) {
-    debug_only(_block_map[po] = NULL);
+  int block_ct = _next_pre_order;
+  _block_map = NEW_ARENA_ARRAY(arena(), Block*, block_ct);
+  assert(block_ct == block_count(), "");
+
+  Block* blk = _rpo_list;
+  for (int m = 0; m < block_ct; m++) {
+    int rpo = blk->rpo();
+    assert(rpo == m, "should be sequential");
+    _block_map[rpo] = blk;
+    blk = blk->rpo_next();
   }
-  ciMethodBlocks *mblks = _methodBlocks;
-  ciBlock* current = NULL;
-  int limit_bci = code_size();
-  for (int bci = 0; bci < limit_bci; bci++) {
-    ciBlock* ciblk = mblks->block_containing(bci);
-    if (ciblk != NULL && ciblk != current) {
-      current = ciblk;
-      int curidx = ciblk->index();
-      int block_count = (_idx_to_blocklist[curidx] == NULL) ? 0 : _idx_to_blocklist[curidx]->length();
-      for (int i = 0; i < block_count; i++) {
-        Block* block = _idx_to_blocklist[curidx]->at(i);
-        if (!block->has_pre_order())  continue;
-        int po = block->pre_order();
-        assert(_block_map[po] == NULL, "unique ref to block");
-        assert(0 <= po && po < pre_order_limit, "");
-        _block_map[po] = block;
-      }
-    }
-  }
-  for (po = 0; po < pre_order_limit; po++) {
-    assert(_block_map[po] != NULL, "must not drop any blocks");
-    Block* block = _block_map[po];
+  assert(blk == NULL, "should be done");
+
+  for (int j = 0; j < block_ct; j++) {
+    assert(_block_map[j] != NULL, "must not drop any blocks");
+    Block* block = _block_map[j];
     // Remove dead blocks from successor lists:
     for (int e = 0; e <= 1; e++) {
       GrowableArray<Block*>* l = e? block->exceptions(): block->successors();
-      for (int i = 0; i < l->length(); i++) {
-        Block* s = l->at(i);
-        if (!s->has_pre_order()) {
+      for (int k = 0; k < l->length(); k++) {
+        Block* s = l->at(k);
+        if (!s->has_post_order()) {
           if (CITraceTypeFlow) {
             tty->print("Removing dead %s successor of #%d: ", (e? "exceptional":  "normal"), block->pre_order());
             s->print_value_on(tty);
             tty->cr();
           }
           l->remove(s);
-          --i;
+          --k;
         }
       }
     }
@@ -2329,7 +2758,7 @@
 //
 // Find a block with this ciBlock which has a compatible JsrSet.
 // If no such block exists, create it, unless the option is no_create.
-// If the option is create_private_copy, always create a fresh private copy.
+// If the option is create_backedge_copy, always create a fresh backedge copy.
 ciTypeFlow::Block* ciTypeFlow::get_block_for(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs, CreateOption option) {
   Arena* a = arena();
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
@@ -2342,11 +2771,11 @@
     _idx_to_blocklist[ciBlockIndex] = blocks;
   }
 
-  if (option != create_private_copy) {
+  if (option != create_backedge_copy) {
     int len = blocks->length();
     for (int i = 0; i < len; i++) {
       Block* block = blocks->at(i);
-      if (!block->is_private_copy() && block->is_compatible_with(jsrs)) {
+      if (!block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
         return block;
       }
     }
@@ -2357,15 +2786,15 @@
 
   // We did not find a compatible block.  Create one.
   Block* new_block = new (a) Block(this, _methodBlocks->block(ciBlockIndex), jsrs);
-  if (option == create_private_copy)  new_block->set_private_copy(true);
+  if (option == create_backedge_copy)  new_block->set_backedge_copy(true);
   blocks->append(new_block);
   return new_block;
 }
 
 // ------------------------------------------------------------------
-// ciTypeFlow::private_copy_count
+// ciTypeFlow::backedge_copy_count
 //
-int ciTypeFlow::private_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
+int ciTypeFlow::backedge_copy_count(int ciBlockIndex, ciTypeFlow::JsrSet* jsrs) const {
   GrowableArray<Block*>* blocks = _idx_to_blocklist[ciBlockIndex];
 
   if (blocks == NULL) {
@@ -2376,7 +2805,7 @@
   int len = blocks->length();
   for (int i = 0; i < len; i++) {
     Block* block = blocks->at(i);
-    if (block->is_private_copy() && block->is_compatible_with(jsrs)) {
+    if (block->is_backedge_copy() && block->is_compatible_with(jsrs)) {
       count++;
     }
   }
@@ -2405,10 +2834,12 @@
   if (failing()) {
     return;
   }
+
+  map_blocks();
+
   if (CIPrintTypeFlow || CITraceTypeFlow) {
-    print_on(tty);
+    rpo_print_on(tty);
   }
-  map_blocks();
 }
 
 // ------------------------------------------------------------------
@@ -2466,4 +2897,19 @@
   st->print_cr("********************************************************");
   st->cr();
 }
+
+void ciTypeFlow::rpo_print_on(outputStream* st) const {
+  st->print_cr("********************************************************");
+  st->print   ("TypeFlow for ");
+  method()->name()->print_symbol_on(st);
+  int limit_bci = code_size();
+  st->print_cr("  %d bytes", limit_bci);
+  for (Block* blk = _rpo_list; blk != NULL; blk = blk->rpo_next()) {
+    blk->print_on(st);
+    st->print_cr("--------------------------------------------------------");
+    st->cr();
+  }
+  st->print_cr("********************************************************");
+  st->cr();
+}
 #endif
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp	Wed Sep 17 12:59:52 2008 -0700
@@ -34,11 +34,13 @@
   int _max_locals;
   int _max_stack;
   int _code_size;
+  bool      _has_irreducible_entry;
 
   const char* _failure_reason;
 
 public:
   class StateVector;
+  class Loop;
   class Block;
 
   // Build a type flow analyzer
@@ -55,6 +57,7 @@
   int       max_stack() const  { return _max_stack; }
   int       max_cells() const  { return _max_locals + _max_stack; }
   int       code_size() const  { return _code_size; }
+  bool      has_irreducible_entry() const { return _has_irreducible_entry; }
 
   // Represents information about an "active" jsr call.  This
   // class represents a call to the routine at some entry address
@@ -125,6 +128,19 @@
     void print_on(outputStream* st) const PRODUCT_RETURN;
   };
 
+  class LocalSet VALUE_OBJ_CLASS_SPEC {
+  private:
+    enum Constants { max = 63 };
+    uint64_t _bits;
+  public:
+    LocalSet() : _bits(0) {}
+    void add(uint32_t i)        { if (i < (uint32_t)max) _bits |=  (1LL << i); }
+    void add(LocalSet* ls)      { _bits |= ls->_bits; }
+    bool test(uint32_t i) const { return i < (uint32_t)max ? (_bits>>i)&1U : true; }
+    void clear()                { _bits = 0; }
+    void print_on(outputStream* st, int limit) const  PRODUCT_RETURN;
+  };
+
   // Used as a combined index for locals and temps
   enum Cell {
     Cell_0, Cell_max = INT_MAX
@@ -142,6 +158,8 @@
     int         _trap_bci;
     int         _trap_index;
 
+    LocalSet    _def_locals;  // For entire block
+
     static ciType* type_meet_internal(ciType* t1, ciType* t2, ciTypeFlow* analyzer);
 
   public:
@@ -181,6 +199,9 @@
     int         monitor_count() const  { return _monitor_count; }
     void    set_monitor_count(int mc)  { _monitor_count = mc; }
 
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
     static Cell start_cell()           { return (Cell)0; }
     static Cell next_cell(Cell c)      { return (Cell)(((int)c) + 1); }
     Cell        limit_cell() const {
@@ -250,6 +271,10 @@
       return type->basic_type() == T_DOUBLE;
     }
 
+    void store_to_local(int lnum) {
+      _def_locals.add((uint) lnum);
+    }
+
     void      push_translate(ciType* type);
 
     void      push_int() {
@@ -358,6 +383,7 @@
              "must be reference type or return address");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_double(int index) {
@@ -376,6 +402,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     void load_local_float(int index) {
@@ -388,6 +416,7 @@
       assert(is_float(type), "must be float type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_int(int index) {
@@ -400,6 +429,7 @@
       assert(is_int(type), "must be int type");
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
+      store_to_local(index);
     }
 
     void load_local_long(int index) {
@@ -418,6 +448,8 @@
       overwrite_local_double_long(index);
       set_type_at(local(index), type);
       set_type_at(local(index+1), type2);
+      store_to_local(index);
+      store_to_local(index+1);
     }
 
     // Stop interpretation of this path with a trap.
@@ -450,13 +482,31 @@
   };
 
   // Parameter for "find_block" calls:
-  // Describes the difference between a public and private copy.
+  // Describes the difference between a public and backedge copy.
   enum CreateOption {
     create_public_copy,
-    create_private_copy,
+    create_backedge_copy,
     no_create
   };
 
+  // Successor iterator
+  class SuccIter : public StackObj {
+  private:
+    Block* _pred;
+    int    _index;
+    Block* _succ;
+  public:
+    SuccIter()                        : _pred(NULL), _index(-1), _succ(NULL) {}
+    SuccIter(Block* pred)             : _pred(pred), _index(-1), _succ(NULL) { next(); }
+    int    index()     { return _index; }
+    Block* pred()      { return _pred; }           // Return predecessor
+    bool   done()      { return _index < 0; }      // Finished?
+    Block* succ()      { return _succ; }           // Return current successor
+    void   next();                                 // Advance
+    void   set_succ(Block* succ);                  // Update current successor
+    bool   is_normal_ctrl() { return index() < _pred->successors()->length(); }
+  };
+
   // A basic block
   class Block : public ResourceObj {
   private:
@@ -470,15 +520,24 @@
     int                              _trap_bci;
     int                              _trap_index;
 
-    // A reasonable approximation to pre-order, provided.to the client.
+    // pre_order, assigned at first visit. Used as block ID and "visited" tag
     int                              _pre_order;
 
-    // Has this block been cloned for some special purpose?
-    bool                             _private_copy;
+    // A post-order, used to compute the reverse post order (RPO) provided to the client
+    int                              _post_order;  // used to compute rpo
+
+    // Has this block been cloned for a loop backedge?
+    bool                             _backedge_copy;
 
     // A pointer used for our internal work list
-    Block*                 _next;
-    bool                   _on_work_list;
+    Block*                           _next;
+    bool                             _on_work_list;      // on the work list
+    Block*                           _rpo_next;          // Reverse post order list
+
+    // Loop info
+    Loop*                            _loop;              // nearest loop
+    bool                             _irreducible_entry; // entry to irreducible loop
+    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -504,10 +563,11 @@
     int start() const         { return _ciblock->start_bci(); }
     int limit() const         { return _ciblock->limit_bci(); }
     int control() const       { return _ciblock->control_bci(); }
+    JsrSet* jsrs() const      { return _jsrs; }
 
-    bool    is_private_copy() const       { return _private_copy; }
-    void   set_private_copy(bool z);
-    int        private_copy_count() const { return outer()->private_copy_count(ciblock()->index(), _jsrs); }
+    bool    is_backedge_copy() const       { return _backedge_copy; }
+    void   set_backedge_copy(bool z);
+    int        backedge_copy_count() const { return outer()->backedge_copy_count(ciblock()->index(), _jsrs); }
 
     // access to entry state
     int     stack_size() const         { return _state->stack_size(); }
@@ -515,6 +575,20 @@
     ciType* local_type_at(int i) const { return _state->local_type_at(i); }
     ciType* stack_type_at(int i) const { return _state->stack_type_at(i); }
 
+    // Data flow on locals
+    bool is_invariant_local(uint v) const {
+      assert(is_loop_head(), "only loop heads");
+      // Find outermost loop with same loop head
+      Loop* lp = loop();
+      while (lp->parent() != NULL) {
+        if (lp->parent()->head() != lp->head()) break;
+        lp = lp->parent();
+      }
+      return !lp->def_locals()->test(v);
+    }
+    LocalSet* def_locals() { return _state->def_locals(); }
+    const LocalSet* def_locals() const { return _state->def_locals(); }
+
     // Get the successors for this Block.
     GrowableArray<Block*>* successors(ciBytecodeStream* str,
                                       StateVector* state,
@@ -524,13 +598,6 @@
       return _successors;
     }
 
-    // Helper function for "successors" when making private copies of
-    // loop heads for C2.
-    Block * clone_loop_head(ciTypeFlow* analyzer,
-                            int branch_bci,
-                            Block* target,
-                            JsrSet* jsrs);
-
     // Get the exceptional successors for this Block.
     GrowableArray<Block*>* exceptions() {
       if (_exceptions == NULL) {
@@ -584,17 +651,126 @@
     bool   is_on_work_list() const  { return _on_work_list; }
 
     bool   has_pre_order() const  { return _pre_order >= 0; }
-    void   set_pre_order(int po)  { assert(!has_pre_order() && po >= 0, ""); _pre_order = po; }
+    void   set_pre_order(int po)  { assert(!has_pre_order(), ""); _pre_order = po; }
     int    pre_order() const      { assert(has_pre_order(), ""); return _pre_order; }
+    void   set_next_pre_order()   { set_pre_order(outer()->inc_next_pre_order()); }
     bool   is_start() const       { return _pre_order == outer()->start_block_num(); }
 
-    // A ranking used in determining order within the work list.
-    bool   is_simpler_than(Block* other);
+    // Reverse post order
+    void   df_init();
+    bool   has_post_order() const { return _post_order >= 0; }
+    void   set_post_order(int po) { assert(!has_post_order() && po >= 0, ""); _post_order = po; }
+    void   reset_post_order(int o){ _post_order = o; }
+    int    post_order() const     { assert(has_post_order(), ""); return _post_order; }
+
+    bool   has_rpo() const        { return has_post_order() && outer()->have_block_count(); }
+    int    rpo() const            { assert(has_rpo(), ""); return outer()->block_count() - post_order() - 1; }
+    void   set_rpo_next(Block* b) { _rpo_next = b; }
+    Block* rpo_next()             { return _rpo_next; }
+
+    // Loops
+    Loop*  loop() const                  { return _loop; }
+    void   set_loop(Loop* lp)            { _loop = lp; }
+    bool   is_loop_head() const          { return _loop && _loop->head() == this; }
+    void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
+    bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    bool   is_visited() const            { return has_pre_order(); }
+    bool   is_post_visited() const       { return has_post_order(); }
+    bool   is_clonable_exit(Loop* lp);
+    Block* looping_succ(Loop* lp);       // Successor inside of loop
+    bool   is_single_entry_loop_head() const {
+      if (!is_loop_head()) return false;
+      for (Loop* lp = loop(); lp != NULL && lp->head() == this; lp = lp->parent())
+        if (lp->is_irreducible()) return false;
+      return true;
+    }
 
     void   print_value_on(outputStream* st) const PRODUCT_RETURN;
     void   print_on(outputStream* st) const       PRODUCT_RETURN;
   };
 
+  // Loop
+  class Loop : public ResourceObj {
+  private:
+    Loop* _parent;
+    Loop* _sibling;  // List of siblings, null terminated
+    Loop* _child;    // Head of child list threaded thru sibling pointer
+    Block* _head;    // Head of loop
+    Block* _tail;    // Tail of loop
+    bool   _irreducible;
+    LocalSet _def_locals;
+
+  public:
+    Loop(Block* head, Block* tail) :
+      _head(head),   _tail(tail),
+      _parent(NULL), _sibling(NULL), _child(NULL),
+      _irreducible(false), _def_locals() {}
+
+    Loop* parent()  const { return _parent; }
+    Loop* sibling() const { return _sibling; }
+    Loop* child()   const { return _child; }
+    Block* head()   const { return _head; }
+    Block* tail()   const { return _tail; }
+    void set_parent(Loop* p)  { _parent = p; }
+    void set_sibling(Loop* s) { _sibling = s; }
+    void set_child(Loop* c)   { _child = c; }
+    void set_head(Block* hd)  { _head = hd; }
+    void set_tail(Block* tl)  { _tail = tl; }
+
+    int depth() const;              // nesting depth
+
+    // Returns true if lp is a nested loop or us.
+    bool contains(Loop* lp) const;
+    bool contains(Block* blk) const { return contains(blk->loop()); }
+
+    // Data flow on locals
+    LocalSet* def_locals() { return &_def_locals; }
+    const LocalSet* def_locals() const { return &_def_locals; }
+
+    // Merge the branch lp into this branch, sorting on the loop head
+    // pre_orders. Returns the new branch.
+    Loop* sorted_merge(Loop* lp);
+
+    // Mark non-single entry to loop
+    void set_irreducible(Block* entry) {
+      _irreducible = true;
+      entry->set_irreducible_entry(true);
+    }
+    bool is_irreducible() const { return _irreducible; }
+
+    bool is_root() const { return _tail->pre_order() == max_jint; }
+
+    void print(outputStream* st = tty, int indent = 0) const PRODUCT_RETURN;
+  };
+
+  // Postorder iteration over the loop tree.
+  class PostorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PostorderLoops(Loop* root) : _root(root), _current(root) {
+      while (_current->child() != NULL) {
+        _current = _current->child();
+      }
+    }
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
+  // Preorder iteration over the loop tree.
+  class PreorderLoops : public StackObj {
+  private:
+    Loop* _root;
+    Loop* _current;
+  public:
+    PreorderLoops(Loop* root) : _root(root), _current(root) {}
+    bool done() { return _current == NULL; }  // Finished iterating?
+    void next();                            // Advance to next loop
+    Loop* current() { return _current; }      // Return current loop.
+  };
+
   // Standard indexes of successors, for various bytecodes.
   enum {
     FALL_THROUGH   = 0,  // normal control
@@ -619,6 +795,12 @@
   // Tells if a given instruction is able to generate an exception edge.
   bool can_trap(ciBytecodeStream& str);
 
+  // Clone the loop heads. Returns true if any cloning occurred.
+  bool clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
+  // Clone lp's head and replace tail's successors with clone.
+  Block* clone_loop_head(Loop* lp, StateVector* temp_vector, JsrSet* temp_set);
+
 public:
   // Return the block beginning at bci which has a JsrSet compatible
   // with jsrs.
@@ -627,8 +809,8 @@
   // block factory
   Block* get_block_for(int ciBlockIndex, JsrSet* jsrs, CreateOption option = create_public_copy);
 
-  // How many of the blocks have the private_copy bit set?
-  int private_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
+  // How many of the blocks have the backedge_copy bit set?
+  int backedge_copy_count(int ciBlockIndex, JsrSet* jsrs) const;
 
   // Return an existing block containing bci which has a JsrSet compatible
   // with jsrs, or NULL if there is none.
@@ -651,11 +833,18 @@
                                       return _block_map[po]; }
   Block* start_block() const        { return pre_order_at(start_block_num()); }
   int start_block_num() const       { return 0; }
+  Block* rpo_at(int rpo) const      { assert(0 <= rpo && rpo < block_count(), "out of bounds");
+                                      return _block_map[rpo]; }
+  int next_pre_order()              { return _next_pre_order; }
+  int inc_next_pre_order()          { return _next_pre_order++; }
 
 private:
   // A work list used during flow analysis.
   Block* _work_list;
 
+  // List of blocks in reverse post order
+  Block* _rpo_list;
+
   // Next Block::_pre_order.  After mapping, doubles as block_count.
   int _next_pre_order;
 
@@ -668,6 +857,15 @@
   // Add a basic block to our work list.
   void add_to_work_list(Block* block);
 
+  // Prepend a basic block to rpo list.
+  void prepend_to_rpo_list(Block* blk) {
+    blk->set_rpo_next(_rpo_list);
+    _rpo_list = blk;
+  }
+
+  // Root of the loop tree
+  Loop* _loop_tree_root;
+
   // State used for make_jsr_record
   int _jsr_count;
   GrowableArray<JsrRecord*>* _jsr_records;
@@ -677,6 +875,9 @@
   // does not already exist.
   JsrRecord* make_jsr_record(int entry_address, int return_address);
 
+  void  set_loop_tree_root(Loop* ltr) { _loop_tree_root = ltr; }
+  Loop* loop_tree_root()              { return _loop_tree_root; }
+
 private:
   // Get the initial state for start_bci:
   const StateVector* get_start_state();
@@ -703,6 +904,15 @@
   // necessary.
   void flow_types();
 
+  // Perform the depth first type flow analysis. Helper for flow_types.
+  void df_flow_types(Block* start,
+                     bool do_flow,
+                     StateVector* temp_vector,
+                     JsrSet* temp_set);
+
+  // Incrementally build loop tree.
+  void build_loop_tree(Block* blk);
+
   // Create the block map, which indexes blocks in pre_order.
   void map_blocks();
 
@@ -711,4 +921,6 @@
   void do_flow();
 
   void print_on(outputStream* st) const PRODUCT_RETURN;
+
+  void rpo_print_on(outputStream* st) const PRODUCT_RETURN;
 };
--- a/hotspot/src/share/vm/includeDB_compiler2	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/includeDB_compiler2	Wed Sep 17 12:59:52 2008 -0700
@@ -582,6 +582,7 @@
 loopTransform.cpp                       addnode.hpp
 loopTransform.cpp                       allocation.inline.hpp
 loopTransform.cpp                       connode.hpp
+loopTransform.cpp                       compileLog.hpp
 loopTransform.cpp                       divnode.hpp
 loopTransform.cpp                       loopnode.hpp
 loopTransform.cpp                       mulnode.hpp
@@ -597,6 +598,7 @@
 loopnode.cpp                            allocation.inline.hpp
 loopnode.cpp                            callnode.hpp
 loopnode.cpp                            ciMethodData.hpp
+loopnode.cpp                            compileLog.hpp
 loopnode.cpp                            connode.hpp
 loopnode.cpp                            divnode.hpp
 loopnode.cpp                            loopnode.hpp
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -25,19 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_bytecodeInfo.cpp.incl"
 
-// These variables are declared in parse1.cpp
-extern int  explicit_null_checks_inserted;
-extern int  explicit_null_checks_elided;
-extern int  explicit_null_checks_inserted_old;
-extern int  explicit_null_checks_elided_old;
-extern int  nodes_created_old;
-extern int  nodes_created;
-extern int  methods_parsed_old;
-extern int  methods_parsed;
-extern int  methods_seen;
-extern int  methods_seen_old;
-
-
 //=============================================================================
 //------------------------------InlineTree-------------------------------------
 InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
@@ -517,27 +504,3 @@
   }
   return iltp;
 }
-
-// ----------------------------------------------------------------------------
-#ifndef PRODUCT
-
-static void per_method_stats() {
-  // Compute difference between this method's cumulative totals and old totals
-  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
-  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
-
-  // Print differences
-  if( explicit_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
-  if( elided_null_checks_cur )
-    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
-
-  // Store the current cumulative totals
-  nodes_created_old = nodes_created;
-  methods_parsed_old = methods_parsed;
-  methods_seen_old = methods_seen;
-  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
-  explicit_null_checks_elided_old = explicit_null_checks_elided;
-}
-
-#endif
--- a/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -1665,7 +1665,11 @@
             // compress paths and change unreachable cycles to TOP
             // If not, we can update the input infinitely along a MergeMem cycle
             // Equivalent code is in MemNode::Ideal_common
-            Node         *m  = phase->transform(n);
+            Node *m  = phase->transform(n);
+            if (outcnt() == 0) {  // Above transform() may kill us!
+              progress = phase->C->top();
+              break;
+            }
             // If tranformed to a MergeMem, get the desired slice
             // Otherwise the returned node represents memory for every slice
             Node *new_mem = (m->is_MergeMem()) ?
--- a/hotspot/src/share/vm/opto/compile.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -467,6 +467,7 @@
     }
   }
   set_print_assembly(print_opto_assembly);
+  set_parsed_irreducible_loop(false);
 #endif
 
   if (ProfileTraps) {
@@ -550,6 +551,8 @@
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
+    print_method("Before RemoveUseless");
+
     // Remove clutter produced by parsing.
     if (!failing()) {
       ResourceMark rm;
@@ -615,8 +618,6 @@
   if (failing())  return;
   NOT_PRODUCT( verify_graph_edges(); )
 
-  print_method("Before Matching");
-
 #ifndef PRODUCT
   if (PrintIdeal) {
     ttyLocker ttyl;  // keep the following output all in one block
@@ -720,6 +721,7 @@
   TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
   TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   set_print_assembly(PrintFrameConverterAssembly);
+  set_parsed_irreducible_loop(false);
 #endif
   CompileWrapper cw(this);
   Init(/*AliasLevel=*/ 0);
--- a/hotspot/src/share/vm/opto/compile.hpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/compile.hpp	Wed Sep 17 12:59:52 2008 -0700
@@ -160,6 +160,7 @@
   bool                  _print_assembly;        // True if we should dump assembly code for this compilation
 #ifndef PRODUCT
   bool                  _trace_opto_output;
+  bool                  _parsed_irreducible_loop; // True if ciTypeFlow detected irreducible loops during parsing
 #endif
 
   // Compilation environment.
@@ -319,6 +320,8 @@
   }
 #ifndef PRODUCT
   bool          trace_opto_output() const       { return _trace_opto_output; }
+  bool              parsed_irreducible_loop() const { return _parsed_irreducible_loop; }
+  void          set_parsed_irreducible_loop(bool z) { _parsed_irreducible_loop = z; }
 #endif
 
   void begin_method() {
--- a/hotspot/src/share/vm/opto/doCall.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/doCall.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -795,7 +795,7 @@
 
     ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
     if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
-        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+        (ikl == actual_receiver || ikl->is_subtype_of(actual_receiver))) {
       // ikl is a same or better type than the original actual_receiver,
       // e.g. static receiver from bytecodes.
       actual_receiver = ikl;
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -587,7 +587,7 @@
 #ifdef ASSERT
   _bci    = kit->bci();
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   _block  = block;
 #endif
 }
@@ -596,7 +596,7 @@
 #ifdef ASSERT
   assert(kit->bci() == _bci, "bci must not shift");
   Parse* parser = kit->is_Parse();
-  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->rpo();
   assert(block == _block,    "block must not shift");
 #endif
   kit->set_map(_map);
--- a/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -1012,6 +1012,8 @@
     if (!has_ctrl(old))
       set_loop(nnn, loop);
   }
+
+  loop->record_for_igvn();
 }
 
 //------------------------------do_maximally_unroll----------------------------
--- a/hotspot/src/share/vm/opto/loopnode.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -1279,7 +1279,7 @@
     // Visit all children, looking for Phis
     for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
       Node *out = cl->out(i);
-      if (!out->is_Phi())  continue; // Looking for phis
+      if (!out->is_Phi() || out == phi)  continue; // Looking for other phis
       PhiNode* phi2 = out->as_Phi();
       Node *incr2 = phi2->in( LoopNode::LoopBackControl );
       // Look for induction variables of the form:  X += constant
@@ -1388,6 +1388,37 @@
 
 #endif
 
+static void log_loop_tree(IdealLoopTree* root, IdealLoopTree* loop, CompileLog* log) {
+  if (loop == root) {
+    if (loop->_child != NULL) {
+      log->begin_head("loop_tree");
+      log->end_head();
+      if( loop->_child ) log_loop_tree(root, loop->_child, log);
+      log->tail("loop_tree");
+      assert(loop->_next == NULL, "what?");
+    }
+  } else {
+    Node* head = loop->_head;
+    log->begin_head("loop");
+    log->print(" idx='%d' ", head->_idx);
+    if (loop->_irreducible) log->print("irreducible='1' ");
+    if (head->is_Loop()) {
+      if (head->as_Loop()->is_inner_loop()) log->print("inner_loop='1' ");
+      if (head->as_Loop()->is_partial_peel_loop()) log->print("partial_peel_loop='1' ");
+    }
+    if (head->is_CountedLoop()) {
+      CountedLoopNode* cl = head->as_CountedLoop();
+      if (cl->is_pre_loop())  log->print("pre_loop='%d' ",  cl->main_idx());
+      if (cl->is_main_loop()) log->print("main_loop='%d' ", cl->_idx);
+      if (cl->is_post_loop()) log->print("post_loop='%d' ",  cl->main_idx());
+    }
+    log->end_head();
+    if( loop->_child ) log_loop_tree(root, loop->_child, log);
+    log->tail("loop");
+    if( loop->_next  ) log_loop_tree(root, loop->_next, log);
+  }
+}
+
 //=============================================================================
 //------------------------------PhaseIdealLoop---------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
@@ -1624,10 +1655,13 @@
   // Cleanup any modified bits
   _igvn.optimize();
 
-  // Do not repeat loop optimizations if irreducible loops are present
-  // by claiming no-progress.
-  if( _has_irreducible_loops )
-    C->clear_major_progress();
+  // disable assert until issue with split_flow_path is resolved (6742111)
+  // assert(!_has_irreducible_loops || C->parsed_irreducible_loop() || C->is_osr_compilation(),
+  //        "shouldn't introduce irreducible loops");
+
+  if (C->log() != NULL) {
+    log_loop_tree(_ltree_root, _ltree_root, C->log());
+  }
 }
 
 #ifndef PRODUCT
@@ -2732,11 +2766,7 @@
 }
 
 void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
-
-  // Indent by loop nesting depth
-  for( uint x = 0; x < loop->_nest; x++ )
-    tty->print("  ");
-  tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+  loop->dump_head();
 
   // Now scan for CFG nodes in the same loop
   for( uint j=idx; j > 0;  j-- ) {
--- a/hotspot/src/share/vm/opto/loopnode.hpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp	Wed Sep 17 12:59:52 2008 -0700
@@ -192,6 +192,8 @@
   int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
   void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
 
+  int main_idx() const { return _main_idx; }
+
 
   void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
   void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
--- a/hotspot/src/share/vm/opto/loopopts.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/loopopts.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -2667,6 +2667,10 @@
   // Fix this by adjusting to use the post-increment trip counter.
   Node *phi = cl->phi();
   if( !phi ) return;            // Dead infinite loop
+
+  // Shape messed up, probably by iteration_split_impl
+  if (phi->in(LoopNode::LoopBackControl) != cl->incr()) return;
+
   bool progress = true;
   while (progress) {
     progress = false;
--- a/hotspot/src/share/vm/opto/matcher.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -273,7 +273,7 @@
   find_shared( C->root() );
   find_shared( C->top() );
 
-  C->print_method("Before Matching", 2);
+  C->print_method("Before Matching");
 
   // Swap out to old-space; emptying new-space
   Arena *old = C->node_arena()->move_contents(C->old_arena());
--- a/hotspot/src/share/vm/opto/parse.hpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/parse.hpp	Wed Sep 17 12:59:52 2008 -0700
@@ -167,9 +167,19 @@
 
     int start() const                      { return flow()->start(); }
     int limit() const                      { return flow()->limit(); }
-    int pre_order() const                  { return flow()->pre_order(); }
+    int rpo() const                        { return flow()->rpo(); }
     int start_sp() const                   { return flow()->stack_size(); }
 
+    bool is_loop_head() const              { return flow()->is_loop_head(); }
+    bool is_SEL_head() const               { return flow()->is_single_entry_loop_head(); }
+    bool is_SEL_backedge(Block* pred) const{ return is_SEL_head() && pred->rpo() >= rpo(); }
+    bool is_invariant_local(uint i) const  {
+      const JVMState* jvms = start_map()->jvms();
+      if (!jvms->is_loc(i)) return false;
+      return flow()->is_invariant_local(i - jvms->locoff());
+    }
+    bool can_elide_SEL_phi(uint i) const  { assert(is_SEL_head(),""); return is_invariant_local(i); }
+
     const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }
 
     const Type* stack_type_at(int i) const;
@@ -305,7 +315,7 @@
   //            entry_bci()     -- see osr_bci, etc.
 
   ciTypeFlow*   flow()          const { return _flow; }
-  //            blocks()        -- see pre_order_at, start_block, etc.
+  //            blocks()        -- see rpo_at, start_block, etc.
   int           block_count()   const { return _block_count; }
 
   GraphKit&     exits()               { return _exits; }
@@ -330,12 +340,12 @@
   // Must this parse be aborted?
   bool failing()                { return C->failing(); }
 
-  Block* pre_order_at(int po) {
-    assert(0 <= po && po < _block_count, "oob");
-    return &_blocks[po];
+  Block* rpo_at(int rpo) {
+    assert(0 <= rpo && rpo < _block_count, "oob");
+    return &_blocks[rpo];
   }
   Block* start_block() {
-    return pre_order_at(flow()->start_block()->pre_order());
+    return rpo_at(flow()->start_block()->rpo());
   }
   // Can return NULL if the flow pass did not complete a block.
   Block* successor_for_bci(int bci) {
@@ -359,9 +369,6 @@
   // Parse all the basic blocks.
   void do_all_blocks();
 
-  // Helper for do_all_blocks; makes one pass in pre-order.
-  void visit_blocks();
-
   // Parse the current basic block
   void do_one_block();
 
--- a/hotspot/src/share/vm/opto/parse1.cpp	Wed Sep 17 08:29:17 2008 -0700
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Wed Sep 17 12:59:52 2008 -0700
@@ -29,17 +29,17 @@
 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp
 // and eventually should be encapsulated in a proper class (gri 8/18/98).
 
-int nodes_created              = 0; int nodes_created_old              = 0;
-int methods_parsed             = 0; int methods_parsed_old             = 0;
-int methods_seen               = 0; int methods_seen_old               = 0;
+int nodes_created              = 0;
+int methods_parsed             = 0;
+int methods_seen               = 0;
+int blocks_parsed              = 0;
+int blocks_seen                = 0;
 
-int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
-int explicit_null_checks_elided   = 0, explicit_null_checks_elided_old   = 0;
+int explicit_null_checks_inserted = 0;
+int explicit_null_checks_elided   = 0;
 int all_null_checks_found         = 0, implicit_null_checks              = 0;
 int implicit_null_throws          = 0;
 
-int parse_idx = 0;
-size_t parse_arena = 0;
 int reclaim_idx  = 0;
 int reclaim_in   = 0;
 int reclaim_node = 0;
@@ -61,6 +61,7 @@
   tty->cr();
   if (methods_seen != methods_parsed)
     tty->print_cr("Reasons for parse failures (NOT cumulative):");
+  tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);
 
   if( explicit_null_checks_inserted )
     tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
@@ -373,6 +374,12 @@
     C->record_method_not_compilable_all_tiers(_flow->failure_reason());
   }
 
+#ifndef PRODUCT
+  if (_flow->has_irreducible_entry()) {
+    C->set_parsed_irreducible_loop(true);
+  }
+#endif
+
   if (_expected_uses <= 0) {
     _prof_factor = 1;
   } else {
@@ -556,118 +563,93 @@
   set_map(entry_map);
   do_exits();
 
-  // Collect a few more statistics.
-  parse_idx += C->unique();
-  parse_arena += C->node_arena()->used();
-
   if (log)  log->done("parse nodes='%d' memory='%d'",
                       C->unique(), C->node_arena()->used());
 }
 
 //---------------------------do_all_blocks-------------------------------------
 void Parse::do_all_blocks() {
-  _blocks_merged = 0;
-  _blocks_parsed = 0;
+  bool has_irreducible = flow()->has_irreducible_entry();
+
+  // Walk over all blocks in Reverse Post-Order.
+  while (true) {
+    bool progress = false;
+    for (int rpo = 0; rpo < block_count(); rpo++) {
+      Block* block = rpo_at(rpo);
+
+      if (block->is_parsed()) continue;
 
-  int old_blocks_merged = -1;
-  int old_blocks_parsed = -1;
+      if (!block->is_merged()) {
+        // Dead block, no state reaches this block
+        continue;
+      }
 
-  for (int tries = 0; ; tries++) {
-    visit_blocks();
-    if (failing())  return; // Check for bailout
+      // Prepare to parse this block.
+      load_state_from(block);
+
+      if (stopped()) {
+        // Block is dead.
+        continue;
+      }
+
+      blocks_parsed++;
 
-    // No need for a work list.  The outer loop is hardly ever repeated.
-    // The following loop traverses the blocks in a reasonable pre-order,
-    // as produced by the ciTypeFlow pass.
+      progress = true;
+      if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
+        // Not all preds have been parsed.  We must build phis everywhere.
+        // (Note that dead locals do not get phis built, ever.)
+        ensure_phis_everywhere();
+
+        // Leave behind an undisturbed copy of the map, for future merges.
+        set_map(clone_map());
+      }
 
-    // This loop can be taken more than once if there are two entries to
-    // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
-    // as the first predecessor to the loop goes dead in the parser,
-    // due to parse-time optimization.  (Could happen with obfuscated code.)
+      if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) {
+        // In the absence of irreducible loops, the Region and Phis
+        // associated with a merge that doesn't involve a backedge can
+        // be simplfied now since the RPO parsing order guarantees
+        // that any path which was supposed to reach here has already
+        // been parsed or must be dead.
+        Node* c = control();
+        Node* result = _gvn.transform_no_reclaim(control());
+        if (c != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block->rpo(), c->_idx, result->_idx);
+        }
+        if (result != top()) {
+          record_for_igvn(result);
+        }
+      }
 
-    // Look for progress, or the lack of it:
-    if (_blocks_parsed == block_count()) {
-      // That's all, folks.
-      if (TraceOptoParse) {
-        tty->print_cr("All blocks parsed.");
-      }
+      // Parse the block.
+      do_one_block();
+
+      // Check for bailouts.
+      if (failing())  return;
+    }
+
+    // with irreducible loops multiple passes might be necessary to parse everything
+    if (!has_irreducible || !progress) {
       break;
     }
+  }
 
-    // How much work was done this time around?
-    int new_blocks_merged = _blocks_merged - old_blocks_merged;
-    int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
-    if (new_blocks_merged == 0) {
-      if (TraceOptoParse) {
-        tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
-      }
-      // No new blocks have become parseable.  Some blocks are just dead.
-      break;
-    }
-    assert(new_blocks_parsed > 0, "must make progress");
-    assert(tries < block_count(), "the pre-order cannot be this bad!");
-
-    old_blocks_merged = _blocks_merged;
-    old_blocks_parsed = _blocks_parsed;
-  }
+  blocks_seen += block_count();
 
 #ifndef PRODUCT
   // Make sure there are no half-processed blocks remaining.
   // Every remaining unprocessed block is dead and may be ignored now.
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (int rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     if (!block->is_parsed()) {
       if (TraceOptoParse) {
-        tty->print("Skipped dead block %d at bci:%d", po, block->start());
-        assert(!block->is_merged(), "no half-processed blocks");
+        tty->print_cr("Skipped dead block %d at bci:%d", rpo, block->start());
       }
+      assert(!block->is_merged(), "no half-processed blocks");
     }
   }
 #endif
 }
 
-//---------------------------visit_blocks--------------------------------------
-void Parse::visit_blocks() {
-  // Walk over all blocks, parsing every one that has been reached (merged).
-  for (int po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-
-    if (block->is_parsed()) {
-      // Do not parse twice.
-      continue;
-    }
-
-    if (!block->is_merged()) {
-      // No state on this block.  It had not yet been reached.
-      // Delay reaching it until later.
-      continue;
-    }
-
-    // Prepare to parse this block.
-    load_state_from(block);
-
-    if (stopped()) {
-      // Block is dead.
-      continue;
-    }
-
-    if (!block->is_ready() || block->is_handler()) {
-      // Not all preds have been parsed.  We must build phis everywhere.
-      // (Note that dead locals do not get phis built, ever.)
-      ensure_phis_everywhere();
-
-      // Leave behind an undisturbed copy of the map, for future merges.
-      set_map(clone_map());
-    }
-
-    // Ready or not, parse the block.
-    do_one_block();
-
-    // Check for bailouts.
-    if (failing())  return;
-  }
-}
-
 //-------------------------------build_exits----------------------------------
 // Build normal and exceptional exit merge points.
 void Parse::build_exits() {
@@ -1134,24 +1116,24 @@
   _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
   Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
 
-  int po;
+  int rpo;
 
   // Initialize the structs.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
-    block->init_node(this, po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
+    block->init_node(this, rpo);
   }
 
   // Collect predecessor and successor information.
-  for (po = 0; po < block_count(); po++) {
-    Block* block = pre_order_at(po);
+  for (rpo = 0; rpo < block_count(); rpo++) {
+    Block* block = rpo_at(rpo);
     block->init_graph(this);
   }
 }
 
 //-------------------------------init_node-------------------------------------
-void Parse::Block::init_node(Parse* outer, int po) {
-  _flow = outer->flow()->pre_order_at(po);
+void Parse::Block::init_node(Parse* outer, int rpo) {
+  _flow = outer->flow()->rpo_at(rpo);
   _pred_count = 0;
   _preds_parsed = 0;
   _count = 0;
@@ -1177,7 +1159,7 @@
   int p = 0;
   for (int i = 0; i < ns+ne; i++) {
     ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
-    Block* block2 = outer->pre_order_at(tf2->pre_order());
+    Block* block2 = outer->rpo_at(tf2->rpo());
     _successors[i] = block2;
 
     // Accumulate pred info for the other block, too.
@@ -1368,10 +1350,11 @@
     int nt = b->all_successors();
 
     tty->print("Parsing block #%d at bci [%d,%d), successors: ",
-                  block()->pre_order(), block()->start(), block()->limit());
+                  block()->rpo(), block()->start(), block()->limit());
     for (int i = 0; i < nt; i++) {
-      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->rpo());
     }
+    if (b->is_loop_head()) tty->print("  lphd");
     tty->print_cr("");
   }
 
@@ -1501,7 +1484,7 @@
 #ifndef PRODUCT
   Block* b = block();
   int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
-  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->rpo(), trap_bci);
 #endif
   ShouldNotReachHere();
 }
@@ -1509,7 +1492,7 @@
 //--------------------------merge_common---------------------------------------
 void Parse::merge_common(Parse::Block* target, int pnum) {
   if (TraceOptoParse) {
-    tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+    tty->print("Merging state at block #%d bci:%d", target->rpo(), target->start());
   }
 
   // Zap extra stack slots to top
@@ -1534,6 +1517,7 @@
     // which must not be allowed into this block's map.)
     if (pnum > PhiNode::Input         // Known multiple inputs.
         || target->is_handler()       // These have unpredictable inputs.
+        || target->is_loop_head()     // Known multiple inputs
         || control()->is_Region()) {  // We must hide this guy.
       // Add a Region to start the new basic block.  Phis will be added
       // later lazily.
@@ -1575,15 +1559,21 @@
 
     // Compute where to merge into
     // Merge incoming control path
-    r->set_req(pnum, newin->control());
+    r->init_req(pnum, newin->control());
 
     if (pnum == 1) {            // Last merge for this Region?
-      _gvn.transform_no_reclaim(r);
+      if (!block()->flow()->is_irreducible_entry()) {
+        Node* result = _gvn.transform_no_reclaim(r);
+        if (r != result && TraceOptoParse) {
+          tty->print_cr("Block #%d replace %d with %d", block()->rpo(), r->_idx, result->_idx);
+        }
+      }
       record_for_igvn(r);
     }
 
     // Update all the non-control inputs to map:
     assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+    bool check_elide_phi = target->is_SEL_backedge(save_block);
     for (uint j = 1; j < newin->req(); j++) {
       Node* m = map()->in(j);   // Current state of target.
       Node* n = newin->in(j);   // Incoming change to target state.
@@ -1603,7 +1593,11 @@
           merge_memory_edges(n->as_MergeMem(), pnum, nophi);
           continue;
         default:                // All normal stuff
-          if (phi == NULL)  phi = ensure_phi(j, nophi);
+          if (phi == NULL) {
+            if (!check_elide_phi || !target->can_elide_SEL_phi(j)) {
+              phi = ensure_phi(j, nophi);
+            }
+          }
           break;
         }
       }
@@ -1736,9 +1730,13 @@
   uint nof_monitors = map()->jvms()->nof_monitors();
 
   assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+  bool check_elide_phi = block()->is_SEL_head();
   for (uint i = TypeFunc::Parms; i < monoff; i++) {
-    ensure_phi(i);
+    if (!check_elide_phi || !block()->can_elide_SEL_phi(i)) {
+      ensure_phi(i);
+    }
   }
+
   // Even monitors need Phis, though they are well-structured.
   // This is true for OSR methods, and also for the rare cases where
   // a monitor object is the subject of a replace_in_map operation.