hotspot/src/share/vm/opto/block.cpp
changeset 1498 346bf226078e
parent 1217 5eb97f366a6a
child 2030 39d55e4534b4
--- a/hotspot/src/share/vm/opto/block.cpp	Thu Oct 30 17:08:48 2008 -0700
+++ b/hotspot/src/share/vm/opto/block.cpp	Thu Nov 06 14:59:10 2008 -0800
@@ -57,6 +57,14 @@
   _blocks[i] = b;
 }
 
+#ifndef PRODUCT
+void Block_List::print() {
+  for (uint i=0; i < size(); i++) {
+    tty->print("B%d ", _blocks[i]->_pre_order);
+  }
+  tty->print("size = %d\n", size());
+}
+#endif
 
 //=============================================================================
 
@@ -66,6 +74,12 @@
   // Check for Start block
   if( _pre_order == 1 ) return InteriorEntryAlignment;
   // Check for loop alignment
+  if (has_loop_alignment())  return loop_alignment();
+
+  return 1;                     // no particular alignment
+}
+
+uint Block::compute_loop_alignment() {
   Node *h = head();
   if( h->is_Loop() && h->as_Loop()->is_inner_loop() )  {
     // Pre- and post-loops have low trip count so do not bother with
@@ -83,13 +97,15 @@
     }
     return OptoLoopAlignment; // Otherwise align loop head
   }
+
   return 1;                     // no particular alignment
 }
 
 //-----------------------------------------------------------------------------
 // Compute the size of first 'inst_cnt' instructions in this block.
 // Return the number of instructions left to compute if the block has
-// less then 'inst_cnt' instructions.
+// less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
+// exceeds OptoLoopAlignment.
 uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
                                     PhaseRegAlloc* ra) {
   uint last_inst = _nodes.size();
@@ -307,6 +323,8 @@
     tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);
     // Dump any loop-specific bits, especially for CountedLoops.
     loop->dump_spec(tty);
+  } else if (has_loop_alignment()) {
+    tty->print(" top-of-loop");
   }
   tty->print(" Freq: %g",_freq);
   if( Verbose || WizardMode ) {
@@ -509,9 +527,11 @@
   int branch_idx = b->_nodes.size() - b->_num_succs-1;
   if( branch_idx < 1 ) return false;
   Node *bra = b->_nodes[branch_idx];
-  if( bra->is_Catch() ) return true;
+  if( bra->is_Catch() )
+    return true;
   if( bra->is_Mach() ) {
-    if( bra->is_MachNullCheck() ) return true;
+    if( bra->is_MachNullCheck() )
+      return true;
     int iop = bra->as_Mach()->ideal_Opcode();
     if( iop == Op_FastLock || iop == Op_FastUnlock )
       return true;
@@ -557,10 +577,10 @@
     dead->_nodes[k]->del_req(j);
 }
 
-//------------------------------MoveToNext-------------------------------------
+//------------------------------move_to_next-----------------------------------
 // Helper function to move block bx to the slot following b_index. Return
 // true if the move is successful, otherwise false
-bool PhaseCFG::MoveToNext(Block* bx, uint b_index) {
+bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
   if (bx == NULL) return false;
 
   // Return false if bx is already scheduled.
@@ -591,9 +611,9 @@
   return true;
 }
 
-//------------------------------MoveToEnd--------------------------------------
+//------------------------------move_to_end------------------------------------
 // Move empty and uncommon blocks to the end.
-void PhaseCFG::MoveToEnd(Block *b, uint i) {
+void PhaseCFG::move_to_end(Block *b, uint i) {
   int e = b->is_Empty();
   if (e != Block::not_empty) {
     if (e == Block::empty_with_goto) {
@@ -609,15 +629,31 @@
   _blocks.push(b);
 }
 
-//------------------------------RemoveEmpty------------------------------------
-// Remove empty basic blocks and useless branches.
-void PhaseCFG::RemoveEmpty() {
+//---------------------------set_loop_alignment--------------------------------
+// Set loop alignment for every block
+void PhaseCFG::set_loop_alignment() {
+  uint last = _num_blocks;
+  assert( _blocks[0] == _broot, "" );
+
+  for (uint i = 1; i < last; i++ ) {
+    Block *b = _blocks[i];
+    if (b->head()->is_Loop()) {
+      b->set_loop_alignment(b);
+    }
+  }
+}
+
+//-----------------------------remove_empty------------------------------------
+// Make empty basic blocks to be "connector" blocks, Move uncommon blocks
+// to the end.
+void PhaseCFG::remove_empty() {
   // Move uncommon blocks to the end
   uint last = _num_blocks;
-  uint i;
   assert( _blocks[0] == _broot, "" );
-  for( i = 1; i < last; i++ ) {
+
+  for (uint i = 1; i < last; i++) {
     Block *b = _blocks[i];
+    if (b->is_connector()) break;
 
     // Check for NeverBranch at block end.  This needs to become a GOTO to the
     // true target.  NeverBranch are treated as a conditional branch that
@@ -629,37 +665,40 @@
       convert_NeverBranch_to_Goto(b);
 
     // Look for uncommon blocks and move to end.
-    if( b->is_uncommon(_bbs) ) {
-      MoveToEnd(b, i);
-      last--;                   // No longer check for being uncommon!
-      if( no_flip_branch(b) ) { // Fall-thru case must follow?
-        b = _blocks[i];         // Find the fall-thru block
-        MoveToEnd(b, i);
-        last--;
+    if (!C->do_freq_based_layout()) {
+      if( b->is_uncommon(_bbs) ) {
+        move_to_end(b, i);
+        last--;                   // No longer check for being uncommon!
+        if( no_flip_branch(b) ) { // Fall-thru case must follow?
+          b = _blocks[i];         // Find the fall-thru block
+          move_to_end(b, i);
+          last--;
+        }
+        i--;                      // backup block counter post-increment
       }
-      i--;                      // backup block counter post-increment
     }
   }
 
-  // Remove empty blocks
-  uint j1;
+  // Move empty blocks to the end
   last = _num_blocks;
-  for( i=0; i < last; i++ ) {
+  for (uint i = 1; i < last; i++) {
     Block *b = _blocks[i];
-    if (i > 0) {
-      if (b->is_Empty() != Block::not_empty) {
-        MoveToEnd(b, i);
-        last--;
-        i--;
-      }
+    if (b->is_Empty() != Block::not_empty) {
+      move_to_end(b, i);
+      last--;
+      i--;
     }
   } // End of for all blocks
+}
 
+//-----------------------------fixup_flow--------------------------------------
+// Fix up the final control flow for basic blocks.
+void PhaseCFG::fixup_flow() {
   // Fixup final control flow for the blocks.  Remove jump-to-next
   // block.  If neither arm of a IF follows the conditional branch, we
   // have to add a second jump after the conditional.  We place the
   // TRUE branch target in succs[0] for both GOTOs and IFs.
-  for( i=0; i < _num_blocks; i++ ) {
+  for (uint i=0; i < _num_blocks; i++) {
     Block *b = _blocks[i];
     b->_pre_order = i;          // turn pre-order into block-index
 
@@ -700,7 +739,7 @@
         }
       }
       // Remove all CatchProjs
-      for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
+      for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
 
     } else if (b->_num_succs == 1) {
       // Block ends in a Goto?
@@ -730,8 +769,7 @@
       // successors after the current one, provided that the
       // successor was previously unscheduled, but moveable
       // (i.e., all paths to it involve a branch).
-      if( bnext != bs0 && bnext != bs1 ) {
-
+      if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
         // Choose the more common successor based on the probability
         // of the conditional branch.
         Block *bx = bs0;
@@ -751,9 +789,9 @@
         }
 
         // Attempt the more common successor first
-        if (MoveToNext(bx, i)) {
+        if (move_to_next(bx, i)) {
           bnext = bx;
-        } else if (MoveToNext(by, i)) {
+        } else if (move_to_next(by, i)) {
           bnext = by;
         }
       }
@@ -774,10 +812,8 @@
         // Flip projection for each target
         { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
 
-      } else if( bnext == bs1 ) { // Fall-thru is already in succs[1]
-
-      } else {                  // Else need a double-branch
-
+      } else if( bnext != bs1 ) {
+        // Need a double-branch
         // The existing conditional branch need not change.
         // Add a unconditional branch to the false target.
         // Alas, it must appear in its own block and adding a
@@ -786,8 +822,9 @@
       }
 
       // Make sure we TRUE branch to the target
-      if( proj0->Opcode() == Op_IfFalse )
+      if( proj0->Opcode() == Op_IfFalse ) {
         iff->negate();
+      }
 
       b->_nodes.pop();          // Remove IfFalse & IfTrue projections
       b->_nodes.pop();
@@ -796,9 +833,7 @@
       // Multi-exit block, e.g. a switch statement
       // But we don't need to do anything here
     }
-
   } // End of for all blocks
-
 }
 
 
@@ -905,7 +940,7 @@
   // Force the Union-Find mapping to be at least this large
   extend(max,0);
   // Initialize to be the ID mapping.
-  for( uint i=0; i<_max; i++ ) map(i,i);
+  for( uint i=0; i<max; i++ ) map(i,i);
 }
 
 //------------------------------Find_compress----------------------------------
@@ -937,7 +972,6 @@
   if( idx >= _max ) return idx;
   uint next = lookup(idx);
   while( next != idx ) {        // Scan chain of equivalences
-    assert( next < idx, "always union smaller" );
     idx = next;                 // until find a fixed-point
     next = lookup(idx);
   }
@@ -956,3 +990,491 @@
   assert( src < dst, "always union smaller" );
   map(dst,src);
 }
+
+#ifndef PRODUCT
+static void edge_dump(GrowableArray<CFGEdge *> *edges) {
+  tty->print_cr("---- Edges ----");
+  for (int i = 0; i < edges->length(); i++) {
+    CFGEdge *e = edges->at(i);
+    if (e != NULL) {
+      edges->at(i)->dump();
+    }
+  }
+}
+
+static void trace_dump(Trace *traces[], int count) {
+  tty->print_cr("---- Traces ----");
+  for (int i = 0; i < count; i++) {
+    Trace *tr = traces[i];
+    if (tr != NULL) {
+      tr->dump();
+    }
+  }
+}
+
+void Trace::dump( ) const {
+  tty->print_cr("Trace (freq %f)", first_block()->_freq);
+  for (Block *b = first_block(); b != NULL; b = next(b)) {
+    tty->print("  B%d", b->_pre_order);
+    if (b->head()->is_Loop()) {
+      tty->print(" (L%d)", b->compute_loop_alignment());
+    }
+    if (b->has_loop_alignment()) {
+      tty->print(" (T%d)", b->code_alignment());
+    }
+  }
+  tty->cr();
+}
+
+void CFGEdge::dump( ) const {
+  tty->print(" B%d  -->  B%d  Freq: %f  out:%3d%%  in:%3d%%  State: ",
+             from()->_pre_order, to()->_pre_order, freq(), _from_pct, _to_pct);
+  switch(state()) {
+  case connected:
+    tty->print("connected");
+    break;
+  case open:
+    tty->print("open");
+    break;
+  case interior:
+    tty->print("interior");
+    break;
+  }
+  if (infrequent()) {
+    tty->print("  infrequent");
+  }
+  tty->cr();
+}
+#endif
+
+//=============================================================================
+
+//------------------------------edge_order-------------------------------------
+// Comparison function for edges
+static int edge_order(CFGEdge **e0, CFGEdge **e1) {
+  float freq0 = (*e0)->freq();
+  float freq1 = (*e1)->freq();
+  if (freq0 != freq1) {
+    return freq0 > freq1 ? -1 : 1;
+  }
+
+  int dist0 = (*e0)->to()->_rpo - (*e0)->from()->_rpo;
+  int dist1 = (*e1)->to()->_rpo - (*e1)->from()->_rpo;
+
+  return dist1 - dist0;
+}
+
+//------------------------------trace_frequency_order--------------------------
+// Comparison function for edges
+static int trace_frequency_order(const void *p0, const void *p1) {
+  Trace *tr0 = *(Trace **) p0;
+  Trace *tr1 = *(Trace **) p1;
+  Block *b0 = tr0->first_block();
+  Block *b1 = tr1->first_block();
+
+  // The trace of connector blocks goes at the end;
+  // we only expect one such trace
+  if (b0->is_connector() != b1->is_connector()) {
+    return b1->is_connector() ? -1 : 1;
+  }
+
+  // Pull more frequently executed blocks to the beginning
+  float freq0 = b0->_freq;
+  float freq1 = b1->_freq;
+  if (freq0 != freq1) {
+    return freq0 > freq1 ? -1 : 1;
+  }
+
+  int diff = tr0->first_block()->_rpo - tr1->first_block()->_rpo;
+
+  return diff;
+}
+
+//------------------------------find_edges-------------------------------------
+// Find edges of interest, i.e, those which can fall through. Presumes that
+// edges which don't fall through are of low frequency and can be generally
+// ignored.  Initialize the list of traces.
+void PhaseBlockLayout::find_edges()
+{
+  // Walk the blocks, creating edges and Traces
+  uint i;
+  Trace *tr = NULL;
+  for (i = 0; i < _cfg._num_blocks; i++) {
+    Block *b = _cfg._blocks[i];
+    tr = new Trace(b, next, prev);
+    traces[tr->id()] = tr;
+
+    // All connector blocks should be at the end of the list
+    if (b->is_connector()) break;
+
+    // If this block and the next one have a one-to-one successor
+    // predecessor relationship, simply append the next block
+    int nfallthru = b->num_fall_throughs();
+    while (nfallthru == 1 &&
+           b->succ_fall_through(0)) {
+      Block *n = b->_succs[0];
+
+      // Skip over single-entry connector blocks, we don't want to
+      // add them to the trace.
+      while (n->is_connector() && n->num_preds() == 1) {
+        n = n->_succs[0];
+      }
+
+      // We see a merge point, so stop search for the next block
+      if (n->num_preds() != 1) break;
+
+      i++;
+      assert(n = _cfg._blocks[i], "expecting next block");
+      tr->append(n);
+      uf->map(n->_pre_order, tr->id());
+      traces[n->_pre_order] = NULL;
+      nfallthru = b->num_fall_throughs();
+      b = n;
+    }
+
+    if (nfallthru > 0) {
+      // Create a CFGEdge for each outgoing
+      // edge that could be a fall-through.
+      for (uint j = 0; j < b->_num_succs; j++ ) {
+        if (b->succ_fall_through(j)) {
+          Block *target = b->non_connector_successor(j);
+          float freq = b->_freq * b->succ_prob(j);
+          int from_pct = (int) ((100 * freq) / b->_freq);
+          int to_pct = (int) ((100 * freq) / target->_freq);
+          edges->append(new CFGEdge(b, target, freq, from_pct, to_pct));
+        }
+      }
+    }
+  }
+
+  // Group connector blocks into one trace
+  for (i++; i < _cfg._num_blocks; i++) {
+    Block *b = _cfg._blocks[i];
+    assert(b->is_connector(), "connector blocks at the end");
+    tr->append(b);
+    uf->map(b->_pre_order, tr->id());
+    traces[b->_pre_order] = NULL;
+  }
+}
+
+//------------------------------union_traces----------------------------------
+// Union two traces together in uf, and null out the trace in the list
+void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
+{
+  uint old_id = old_trace->id();
+  uint updated_id = updated_trace->id();
+
+  uint lo_id = updated_id;
+  uint hi_id = old_id;
+
+  // If from is greater than to, swap values to meet
+  // UnionFind guarantee.
+  if (updated_id > old_id) {
+    lo_id = old_id;
+    hi_id = updated_id;
+
+    // Fix up the trace ids
+    traces[lo_id] = traces[updated_id];
+    updated_trace->set_id(lo_id);
+  }
+
+  // Union the lower with the higher and remove the pointer
+  // to the higher.
+  uf->Union(lo_id, hi_id);
+  traces[hi_id] = NULL;
+}
+
+//------------------------------grow_traces-------------------------------------
+// Append traces together via the most frequently executed edges
+void PhaseBlockLayout::grow_traces()
+{
+  // Order the edges, and drive the growth of Traces via the most
+  // frequently executed edges.
+  edges->sort(edge_order);
+  for (int i = 0; i < edges->length(); i++) {
+    CFGEdge *e = edges->at(i);
+
+    if (e->state() != CFGEdge::open) continue;
+
+    Block *src_block = e->from();
+    Block *targ_block = e->to();
+
+    // Don't grow traces along backedges?
+    if (!BlockLayoutRotateLoops) {
+      if (targ_block->_rpo <= src_block->_rpo) {
+        targ_block->set_loop_alignment(targ_block);
+        continue;
+      }
+    }
+
+    Trace *src_trace = trace(src_block);
+    Trace *targ_trace = trace(targ_block);
+
+    // If the edge in question can join two traces at their ends,
+    // append one trace to the other.
+   if (src_trace->last_block() == src_block) {
+      if (src_trace == targ_trace) {
+        e->set_state(CFGEdge::interior);
+        if (targ_trace->backedge(e)) {
+          // Reset i to catch any newly eligible edge
+          // (Or we could remember the first "open" edge, and reset there)
+          i = 0;
+        }
+      } else if (targ_trace->first_block() == targ_block) {
+        e->set_state(CFGEdge::connected);
+        src_trace->append(targ_trace);
+        union_traces(src_trace, targ_trace);
+      }
+    }
+  }
+}
+
+//------------------------------merge_traces-----------------------------------
+// Embed one trace into another, if the fork or join points are sufficiently
+// balanced.
+void PhaseBlockLayout::merge_traces(bool fall_thru_only)
+{
+  // Walk the edge list a another time, looking at unprocessed edges.
+  // Fold in diamonds
+  for (int i = 0; i < edges->length(); i++) {
+    CFGEdge *e = edges->at(i);
+
+    if (e->state() != CFGEdge::open) continue;
+    if (fall_thru_only) {
+      if (e->infrequent()) continue;
+    }
+
+    Block *src_block = e->from();
+    Trace *src_trace = trace(src_block);
+    bool src_at_tail = src_trace->last_block() == src_block;
+
+    Block *targ_block  = e->to();
+    Trace *targ_trace  = trace(targ_block);
+    bool targ_at_start = targ_trace->first_block() == targ_block;
+
+    if (src_trace == targ_trace) {
+      // This may be a loop, but we can't do much about it.
+      e->set_state(CFGEdge::interior);
+      continue;
+    }
+
+    if (fall_thru_only) {
+      // If the edge links the middle of two traces, we can't do anything.
+      // Mark the edge and continue.
+      if (!src_at_tail & !targ_at_start) {
+        continue;
+      }
+
+      // Don't grow traces along backedges?
+      if (!BlockLayoutRotateLoops && (targ_block->_rpo <= src_block->_rpo)) {
+          continue;
+      }
+
+      // If both ends of the edge are available, why didn't we handle it earlier?
+      assert(src_at_tail ^ targ_at_start, "Should have caught this edge earlier.");
+
+      if (targ_at_start) {
+        // Insert the "targ" trace in the "src" trace if the insertion point
+        // is a two way branch.
+        // Better profitability check possible, but may not be worth it.
+        // Someday, see if the this "fork" has an associated "join";
+        // then make a policy on merging this trace at the fork or join.
+        // For example, other things being equal, it may be better to place this
+        // trace at the join point if the "src" trace ends in a two-way, but
+        // the insertion point is one-way.
+        assert(src_block->num_fall_throughs() == 2, "unexpected diamond");
+        e->set_state(CFGEdge::connected);
+        src_trace->insert_after(src_block, targ_trace);
+        union_traces(src_trace, targ_trace);
+      } else if (src_at_tail) {
+        if (src_trace != trace(_cfg._broot)) {
+          e->set_state(CFGEdge::connected);
+          targ_trace->insert_before(targ_block, src_trace);
+          union_traces(targ_trace, src_trace);
+        }
+      }
+    } else if (e->state() == CFGEdge::open) {
+      // Append traces, even without a fall-thru connection.
+      // But leave root entry at the begining of the block list.
+      if (targ_trace != trace(_cfg._broot)) {
+        e->set_state(CFGEdge::connected);
+        src_trace->append(targ_trace);
+        union_traces(src_trace, targ_trace);
+      }
+    }
+  }
+}
+
+//----------------------------reorder_traces-----------------------------------
+// Order the sequence of the traces in some desirable way, and fixup the
+// jumps at the end of each block.
+void PhaseBlockLayout::reorder_traces(int count)
+{
+  ResourceArea *area = Thread::current()->resource_area();
+  Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
+  Block_List worklist;
+  int new_count = 0;
+
+  // Compact the traces.
+  for (int i = 0; i < count; i++) {
+    Trace *tr = traces[i];
+    if (tr != NULL) {
+      new_traces[new_count++] = tr;
+    }
+  }
+
+  // The entry block should be first on the new trace list.
+  Trace *tr = trace(_cfg._broot);
+  assert(tr == new_traces[0], "entry trace misplaced");
+
+  // Sort the new trace list by frequency
+  qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);
+
+  // Patch up the successor blocks
+  _cfg._blocks.reset();
+  _cfg._num_blocks = 0;
+  for (int i = 0; i < new_count; i++) {
+    Trace *tr = new_traces[i];
+    if (tr != NULL) {
+      tr->fixup_blocks(_cfg);
+    }
+  }
+}
+
+//------------------------------PhaseBlockLayout-------------------------------
+// Order basic blocks based on frequency
+PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
+  Phase(BlockLayout),
+  _cfg(cfg)
+{
+  ResourceMark rm;
+  ResourceArea *area = Thread::current()->resource_area();
+
+  // List of traces
+  int size = _cfg._num_blocks + 1;
+  traces = NEW_ARENA_ARRAY(area, Trace *, size);
+  memset(traces, 0, size*sizeof(Trace*));
+  next = NEW_ARENA_ARRAY(area, Block *, size);
+  memset(next,   0, size*sizeof(Block *));
+  prev = NEW_ARENA_ARRAY(area, Block *, size);
+  memset(prev  , 0, size*sizeof(Block *));
+
+  // List of edges
+  edges = new GrowableArray<CFGEdge*>;
+
+  // Mapping block index --> block_trace
+  uf = new UnionFind(size);
+  uf->reset(size);
+
+  // Find edges and create traces.
+  find_edges();
+
+  // Grow traces at their ends via most frequent edges.
+  grow_traces();
+
+  // Merge one trace into another, but only at fall-through points.
+  // This may make diamonds and other related shapes in a trace.
+  merge_traces(true);
+
+  // Run merge again, allowing two traces to be catenated, even if
+  // one does not fall through into the other. This appends loosely
+  // related traces to be near each other.
+  merge_traces(false);
+
+  // Re-order all the remaining traces by frequency
+  reorder_traces(size);
+
+  assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
+}
+
+
+//------------------------------backedge---------------------------------------
+// Edge e completes a loop in a trace. If the target block is head of the
+// loop, rotate the loop block so that the loop ends in a conditional branch.
+bool Trace::backedge(CFGEdge *e) {
+  bool loop_rotated = false;
+  Block *src_block  = e->from();
+  Block *targ_block    = e->to();
+
+  assert(last_block() == src_block, "loop discovery at back branch");
+  if (first_block() == targ_block) {
+    if (BlockLayoutRotateLoops && last_block()->num_fall_throughs() < 2) {
+      // Find the last block in the trace that has a conditional
+      // branch.
+      Block *b;
+      for (b = last_block(); b != NULL; b = prev(b)) {
+        if (b->num_fall_throughs() == 2) {
+          break;
+        }
+      }
+
+      if (b != last_block() && b != NULL) {
+        loop_rotated = true;
+
+        // Rotate the loop by doing two-part linked-list surgery.
+        append(first_block());
+        break_loop_after(b);
+      }
+    }
+
+    // Backbranch to the top of a trace
+    // Scroll foward through the trace from the targ_block. If we find
+    // a loop head before another loop top, use the the loop head alignment.
+    for (Block *b = targ_block; b != NULL; b = next(b)) {
+      if (b->has_loop_alignment()) {
+        break;
+      }
+      if (b->head()->is_Loop()) {
+        targ_block = b;
+        break;
+      }
+    }
+
+    first_block()->set_loop_alignment(targ_block);
+
+  } else {
+    // Backbranch into the middle of a trace
+    targ_block->set_loop_alignment(targ_block);
+  }
+
+  return loop_rotated;
+}
+
+//------------------------------fixup_blocks-----------------------------------
+// push blocks onto the CFG list
+// ensure that blocks have the correct two-way branch sense
+void Trace::fixup_blocks(PhaseCFG &cfg) {
+  Block *last = last_block();
+  for (Block *b = first_block(); b != NULL; b = next(b)) {
+    cfg._blocks.push(b);
+    cfg._num_blocks++;
+    if (!b->is_connector()) {
+      int nfallthru = b->num_fall_throughs();
+      if (b != last) {
+        if (nfallthru == 2) {
+          // Ensure that the sense of the branch is correct
+          Block *bnext = next(b);
+          Block *bs0 = b->non_connector_successor(0);
+
+          MachNode *iff = b->_nodes[b->_nodes.size()-3]->as_Mach();
+          ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
+          ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+
+          if (bnext == bs0) {
+            // Fall-thru case in succs[0], should be in succs[1]
+
+            // Flip targets in _succs map
+            Block *tbs0 = b->_succs[0];
+            Block *tbs1 = b->_succs[1];
+            b->_succs.map( 0, tbs1 );
+            b->_succs.map( 1, tbs0 );
+
+            // Flip projections to match targets
+            b->_nodes.map(b->_nodes.size()-2, proj1);
+            b->_nodes.map(b->_nodes.size()-1, proj0);
+          }
+        }
+      }
+    }
+  }
+}