hotspot/src/share/vm/opto/gcm.cpp
changeset 22828 17ecb098bc1e
parent 22807 1cf02ef734e2
parent 19330 49d6711171e6
child 22838 82c7497fbad4
--- a/hotspot/src/share/vm/opto/gcm.cpp	Thu Aug 22 09:39:54 2013 -0700
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Thu Sep 05 11:04:39 2013 -0700
@@ -70,7 +70,7 @@
 // are in b also.
 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
   // Set basic block of n, Add n to b,
-  _bbs.map(n->_idx, b);
+  map_node_to_block(n, b);
   b->add_inst(n);
 
   // After Matching, nearly any old Node may have projections trailing it.
@@ -79,11 +79,12 @@
   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
     Node*  use  = n->fast_out(i);
     if (use->is_Proj()) {
-      Block* buse = _bbs[use->_idx];
+      Block* buse = get_block_for_node(use);
       if (buse != b) {              // In wrong block?
-        if (buse != NULL)
+        if (buse != NULL) {
           buse->find_remove(use);   // Remove from wrong block
-        _bbs.map(use->_idx, b);     // Re-insert in this block
+        }
+        map_node_to_block(use, b);
         b->add_inst(use);
       }
     }
@@ -101,7 +102,7 @@
   if (p != NULL && p != n) {    // Control from a block projection?
     assert(!n->pinned() || n->is_MachConstantBase(), "only pinned MachConstantBase node is expected here");
     // Find trailing Region
-    Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
+    Block *pb = get_block_for_node(in0); // Block-projection already has basic block
     uint j = 0;
     if (pb->_num_succs != 1) {  // More then 1 successor?
       // Search for successor
@@ -124,26 +125,30 @@
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
-void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
   // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique()+8);
+  GrowableArray <Node *> spstack(C->unique() + 8);
   spstack.push(_root);
-  while ( spstack.is_nonempty() ) {
-    Node *n = spstack.pop();
-    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
-      if( n->pinned() && !_bbs.lookup(n->_idx) ) {  // Pinned?  Nail it down!
-        assert( n->in(0), "pinned Node must have Control" );
+  while (spstack.is_nonempty()) {
+    Node* node = spstack.pop();
+    if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
+      if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
+        assert(node->in(0), "pinned Node must have Control");
         // Before setting block replace block_proj control edge
-        replace_block_proj_ctrl(n);
-        Node *input = n->in(0);
-        while( !input->is_block_start() )
+        replace_block_proj_ctrl(node);
+        Node* input = node->in(0);
+        while (!input->is_block_start()) {
           input = input->in(0);
-        Block *b = _bbs[input->_idx];  // Basic block of controlling input
-        schedule_node_into_block(n, b);
+        }
+        Block* block = get_block_for_node(input); // Basic block of controlling input
+        schedule_node_into_block(node, block);
       }
-      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
-        if( n->in(i) != NULL )
-          spstack.push(n->in(i));
+
+      // process all inputs that are non NULL
+      for (int i = node->req() - 1; i >= 0; --i) {
+        if (node->in(i) != NULL) {
+          spstack.push(node->in(i));
+        }
       }
     }
   }
@@ -153,7 +158,7 @@
 // Assert that new input b2 is dominated by all previous inputs.
 // Check this by by seeing that it is dominated by b1, the deepest
 // input observed until b2.
-static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) {
+static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
   if (b1 == NULL)  return;
   assert(b1->_dom_depth < b2->_dom_depth, "sanity");
   Block* tmp = b2;
@@ -166,7 +171,7 @@
     for (uint j=0; j<n->len(); j++) { // For all inputs
       Node* inn = n->in(j); // Get input
       if (inn == NULL)  continue;  // Ignore NULL, missing inputs
-      Block* inb = bbs[inn->_idx];
+      Block* inb = cfg->get_block_for_node(inn);
       tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
                  inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
       inn->dump();
@@ -178,20 +183,20 @@
 }
 #endif
 
-static Block* find_deepest_input(Node* n, Block_Array &bbs) {
+static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
   // Find the last input dominated by all other inputs.
   Block* deepb           = NULL;        // Deepest block so far
   int    deepb_dom_depth = 0;
   for (uint k = 0; k < n->len(); k++) { // For all inputs
     Node* inn = n->in(k);               // Get input
     if (inn == NULL)  continue;         // Ignore NULL, missing inputs
-    Block* inb = bbs[inn->_idx];
+    Block* inb = cfg->get_block_for_node(inn);
     assert(inb != NULL, "must already have scheduled this input");
     if (deepb_dom_depth < (int) inb->_dom_depth) {
       // The new inb must be dominated by the previous deepb.
       // The various inputs must be linearly ordered in the dom
       // tree, or else there will not be a unique deepest block.
-      DEBUG_ONLY(assert_dom(deepb, inb, n, bbs));
+      DEBUG_ONLY(assert_dom(deepb, inb, n, cfg));
       deepb = inb;                      // Save deepest block
       deepb_dom_depth = deepb->_dom_depth;
     }
@@ -207,32 +212,29 @@
 // which all their inputs occur.
 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
   // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
-  // roots.push(_root); _root will be processed among C->top() inputs
+  Node_Stack nstack(roots.Size() + 8);
+  // _root will be processed among C->top() inputs
   roots.push(C->top());
   visited.set(C->top()->_idx);
 
   while (roots.size() != 0) {
     // Use local variables nstack_top_n & nstack_top_i to cache values
     // on stack's top.
-    Node *nstack_top_n = roots.pop();
-    uint  nstack_top_i = 0;
-//while_nstack_nonempty:
+    Node* parent_node = roots.pop();
+    uint  input_index = 0;
+
     while (true) {
-      // Get parent node and next input's index from stack's top.
-      Node *n = nstack_top_n;
-      uint  i = nstack_top_i;
-
-      if (i == 0) {
+      if (input_index == 0) {
         // Fixup some control.  Constants without control get attached
         // to root and nodes that use is_block_proj() nodes should be attached
         // to the region that starts their block.
-        const Node *in0 = n->in(0);
-        if (in0 != NULL) {              // Control-dependent?
-          replace_block_proj_ctrl(n);
-        } else {               // n->in(0) == NULL
-          if (n->req() == 1) { // This guy is a constant with NO inputs?
-            n->set_req(0, _root);
+        const Node* control_input = parent_node->in(0);
+        if (control_input != NULL) {
+          replace_block_proj_ctrl(parent_node);
+        } else {
+          // Is a constant with NO inputs?
+          if (parent_node->req() == 1) {
+            parent_node->set_req(0, _root);
           }
         }
       }
@@ -241,37 +243,47 @@
       // input is already in a block we quit following inputs (to avoid
       // cycles). Instead we put that Node on a worklist to be handled
       // later (since IT'S inputs may not have a block yet).
-      bool done = true;              // Assume all n's inputs will be processed
-      while (i < n->len()) {         // For all inputs
-        Node *in = n->in(i);         // Get input
-        ++i;
-        if (in == NULL) continue;    // Ignore NULL, missing inputs
+
+      // Assume all n's inputs will be processed
+      bool done = true;
+
+      while (input_index < parent_node->len()) {
+        Node* in = parent_node->in(input_index++);
+        if (in == NULL) {
+          continue;
+        }
+
         int is_visited = visited.test_set(in->_idx);
-        if (!_bbs.lookup(in->_idx)) { // Missing block selection?
+        if (!has_block(in)) {
           if (is_visited) {
-            // assert( !visited.test(in->_idx), "did not schedule early" );
             return false;
           }
-          nstack.push(n, i);         // Save parent node and next input's index.
-          nstack_top_n = in;         // Process current input now.
-          nstack_top_i = 0;
-          done = false;              // Not all n's inputs processed.
-          break; // continue while_nstack_nonempty;
-        } else if (!is_visited) {    // Input not yet visited?
-          roots.push(in);            // Visit this guy later, using worklist
+          // Save parent node and next input's index.
+          nstack.push(parent_node, input_index);
+          // Process current input now.
+          parent_node = in;
+          input_index = 0;
+          // Not all n's inputs processed.
+          done = false;
+          break;
+        } else if (!is_visited) {
+          // Visit this guy later, using worklist
+          roots.push(in);
         }
       }
+
       if (done) {
         // All of n's inputs have been processed, complete post-processing.
 
         // Some instructions are pinned into a block.  These include Region,
         // Phi, Start, Return, and other control-dependent instructions and
         // any projections which depend on them.
-        if (!n->pinned()) {
+        if (!parent_node->pinned()) {
           // Set earliest legal block.
-          _bbs.map(n->_idx, find_deepest_input(n, _bbs));
+          Block* earliest_block = find_deepest_input(parent_node, this);
+          map_node_to_block(parent_node, earliest_block);
         } else {
-          assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge");
+          assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
         }
 
         if (nstack.is_empty()) {
@@ -280,12 +292,12 @@
           break;
         }
         // Get saved parent node and next input's index.
-        nstack_top_n = nstack.node();
-        nstack_top_i = nstack.index();
+        parent_node = nstack.node();
+        input_index = nstack.index();
         nstack.pop();
-      } //    if (done)
-    }   // while (true)
-  }     // while (roots.size() != 0)
+      }
+    }
+  }
   return true;
 }
 
@@ -317,8 +329,8 @@
 // The definition must dominate the use, so move the LCA upward in the
 // dominator tree to dominate the use.  If the use is a phi, adjust
 // the LCA only with the phi input paths which actually use this def.
-static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) {
-  Block* buse = bbs[use->_idx];
+static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, const PhaseCFG* cfg) {
+  Block* buse = cfg->get_block_for_node(use);
   if (buse == NULL)    return LCA;   // Unused killing Projs have no use block
   if (!use->is_Phi())  return buse->dom_lca(LCA);
   uint pmax = use->req();       // Number of Phi inputs
@@ -333,7 +345,7 @@
   // more than once.
   for (uint j=1; j<pmax; j++) { // For all inputs
     if (use->in(j) == def) {    // Found matching input?
-      Block* pred = bbs[buse->pred(j)->_idx];
+      Block* pred = cfg->get_block_for_node(buse->pred(j));
       LCA = pred->dom_lca(LCA);
     }
   }
@@ -346,8 +358,7 @@
 // which are marked with the given index.  Return the LCA (in the dom tree)
 // of all marked blocks.  If there are none marked, return the original
 // LCA.
-static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
-                                    Block* early, Block_Array &bbs) {
+static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark, Block* early, const PhaseCFG* cfg) {
   Block_List worklist;
   worklist.push(LCA);
   while (worklist.size() > 0) {
@@ -370,7 +381,7 @@
     } else {
       // Keep searching through this block's predecessors.
       for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
-        Block* mid_parent = bbs[ mid->pred(j)->_idx ];
+        Block* mid_parent = cfg->get_block_for_node(mid->pred(j));
         worklist.push(mid_parent);
       }
     }
@@ -388,7 +399,7 @@
 // be earlier (at a shallower dom_depth) than the true schedule_early
 // point of the node. We compute this earlier block as a more permissive
 // site for anti-dependency insertion, but only if subsume_loads is enabled.
-static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) {
+static Block* memory_early_block(Node* load, Block* early, const PhaseCFG* cfg) {
   Node* base;
   Node* index;
   Node* store = load->in(MemNode::Memory);
@@ -416,12 +427,12 @@
     Block* deepb           = NULL;        // Deepest block so far
     int    deepb_dom_depth = 0;
     for (int i = 0; i < mem_inputs_length; i++) {
-      Block* inb = bbs[mem_inputs[i]->_idx];
+      Block* inb = cfg->get_block_for_node(mem_inputs[i]);
       if (deepb_dom_depth < (int) inb->_dom_depth) {
         // The new inb must be dominated by the previous deepb.
         // The various inputs must be linearly ordered in the dom
         // tree, or else there will not be a unique deepest block.
-        DEBUG_ONLY(assert_dom(deepb, inb, load, bbs));
+        DEBUG_ONLY(assert_dom(deepb, inb, load, cfg));
         deepb = inb;                      // Save deepest block
         deepb_dom_depth = deepb->_dom_depth;
       }
@@ -492,14 +503,14 @@
   // and other inputs are first available.  (Computed by schedule_early.)
   // For normal loads, 'early' is the shallowest place (dom graph wise)
   // to look for anti-deps between this load and any store.
-  Block* early = _bbs[load_index];
+  Block* early = get_block_for_node(load);
 
   // If we are subsuming loads, compute an "early" block that only considers
   // memory or address inputs. This block may be different than the
   // schedule_early block in that it could be at an even shallower depth in the
   // dominator tree, and allow for a broader discovery of anti-dependences.
   if (C->subsume_loads()) {
-    early = memory_early_block(load, early, _bbs);
+    early = memory_early_block(load, early, this);
   }
 
   ResourceArea *area = Thread::current()->resource_area();
@@ -623,7 +634,7 @@
     // or else observe that 'store' is all the way up in the
     // earliest legal block for 'load'.  In the latter case,
     // immediately insert an anti-dependence edge.
-    Block* store_block = _bbs[store->_idx];
+    Block* store_block = get_block_for_node(store);
     assert(store_block != NULL, "unused killing projections skipped above");
 
     if (store->is_Phi()) {
@@ -641,7 +652,7 @@
       for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
         if (store->in(j) == mem) {   // Found matching input?
           DEBUG_ONLY(found_match = true);
-          Block* pred_block = _bbs[store_block->pred(j)->_idx];
+          Block* pred_block = get_block_for_node(store_block->pred(j));
           if (pred_block != early) {
             // If any predecessor of the Phi matches the load's "early block",
             // we do not need a precedence edge between the Phi and 'load'
@@ -715,7 +726,7 @@
   // preventing the load from sinking past any block containing
   // a store that may invalidate the memory state required by 'load'.
   if (must_raise_LCA)
-    LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs);
+    LCA = raise_LCA_above_marks(LCA, load->_idx, early, this);
   if (LCA == early)  return LCA;
 
   // Insert anti-dependence edges from 'load' to each store
@@ -724,7 +735,7 @@
   if (LCA->raise_LCA_mark() == load_index) {
     while (non_early_stores.size() > 0) {
       Node* store = non_early_stores.pop();
-      Block* store_block = _bbs[store->_idx];
+      Block* store_block = get_block_for_node(store);
       if (store_block == LCA) {
         // add anti_dependence from store to load in its own block
         assert(store != load->in(0), "dependence cycle found");
@@ -758,7 +769,7 @@
 
 public:
   // Constructor for the iterator
-  Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs);
+  Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg);
 
   // Postincrement operator to iterate over the nodes
   Node *next();
@@ -766,12 +777,12 @@
 private:
   VectorSet   &_visited;
   Node_List   &_stack;
-  Block_Array &_bbs;
+  PhaseCFG &_cfg;
 };
 
 // Constructor for the Node_Backward_Iterator
-Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs )
-  : _visited(visited), _stack(stack), _bbs(bbs) {
+Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, PhaseCFG &cfg)
+  : _visited(visited), _stack(stack), _cfg(cfg) {
   // The stack should contain exactly the root
   stack.clear();
   stack.push(root);
@@ -801,8 +812,8 @@
     _visited.set(self->_idx);
 
     // Now schedule all uses as late as possible.
-    uint src     = self->is_Proj() ? self->in(0)->_idx : self->_idx;
-    uint src_rpo = _bbs[src]->_rpo;
+    const Node* src = self->is_Proj() ? self->in(0) : self;
+    uint src_rpo = _cfg.get_block_for_node(src)->_rpo;
 
     // Schedule all nodes in a post-order visit
     Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any
@@ -818,7 +829,7 @@
 
       // do not traverse backward control edges
       Node *use = n->is_Proj() ? n->in(0) : n;
-      uint use_rpo = _bbs[use->_idx]->_rpo;
+      uint use_rpo = _cfg.get_block_for_node(use)->_rpo;
 
       if ( use_rpo < src_rpo )
         continue;
@@ -850,13 +861,13 @@
 
 //------------------------------ComputeLatenciesBackwards----------------------
 // Compute the latency of all the instructions.
-void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
 #ifndef PRODUCT
   if (trace_opto_pipelining())
     tty->print("\n#---- ComputeLatenciesBackwards ----\n");
 #endif
 
-  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
   Node *n;
 
   // Walk over all the nodes from last to first
@@ -873,31 +884,34 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
 
-  if (n->is_Proj())
+  if (n->is_Proj()) {
     n = n->in(0);
+  }
 
-  if (n->is_Root())
+  if (n->is_Root()) {
     return;
+  }
 
   uint nlen = n->len();
-  uint use_latency = _node_latency->at_grow(n->_idx);
-  uint use_pre_order = _bbs[n->_idx]->_pre_order;
+  uint use_latency = get_latency_for_node(n);
+  uint use_pre_order = get_block_for_node(n)->_pre_order;
 
-  for ( uint j=0; j<nlen; j++ ) {
+  for (uint j = 0; j < nlen; j++) {
     Node *def = n->in(j);
 
-    if (!def || def == n)
+    if (!def || def == n) {
       continue;
+    }
 
     // Walk backwards thru projections
-    if (def->is_Proj())
+    if (def->is_Proj()) {
       def = def->in(0);
+    }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -907,25 +921,23 @@
 #endif
 
     // If the defining block is not known, assume it is ok
-    Block *def_block = _bbs[def->_idx];
+    Block *def_block = get_block_for_node(def);
     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 
-    if ( (use_pre_order <  def_pre_order) ||
-         (use_pre_order == def_pre_order && n->is_Phi()) )
+    if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
       continue;
+    }
 
     uint delta_latency = n->latency(j);
     uint current_latency = delta_latency + use_latency;
 
-    if (_node_latency->at_grow(def->_idx) < current_latency) {
-      _node_latency->at_put_grow(def->_idx, current_latency);
+    if (get_latency_for_node(def) < current_latency) {
+      set_latency_for_node(def, current_latency);
     }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
-      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
-                    use_latency, j, delta_latency, current_latency, def->_idx,
-                    _node_latency->at_grow(def->_idx));
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
     }
 #endif
   }
@@ -935,10 +947,11 @@
 // Compute the latency of a specific use
 int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
   // If self-reference, return no latency
-  if (use == n || use->is_Root())
+  if (use == n || use->is_Root()) {
     return 0;
+  }
 
-  uint def_pre_order = _bbs[def->_idx]->_pre_order;
+  uint def_pre_order = get_block_for_node(def)->_pre_order;
   uint latency = 0;
 
   // If the use is not a projection, then it is simple...
@@ -950,7 +963,7 @@
     }
 #endif
 
-    uint use_pre_order = _bbs[use->_idx]->_pre_order;
+    uint use_pre_order = get_block_for_node(use)->_pre_order;
 
     if (use_pre_order < def_pre_order)
       return 0;
@@ -959,7 +972,7 @@
       return 0;
 
     uint nlen = use->len();
-    uint nl = _node_latency->at_grow(use->_idx);
+    uint nl = get_latency_for_node(use);
 
     for ( uint j=0; j<nlen; j++ ) {
       if (use->in(j) == n) {
@@ -994,8 +1007,7 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
@@ -1008,7 +1020,7 @@
     if (latency < l) latency = l;
   }
 
-  _node_latency->at_put_grow(n->_idx, latency);
+  set_latency_for_node(n, latency);
 }
 
 //------------------------------hoist_to_cheaper_block-------------------------
@@ -1018,11 +1030,11 @@
   const double delta = 1+PROB_UNLIKELY_MAG(4);
   Block* least       = LCA;
   double least_freq  = least->_freq;
-  uint target        = _node_latency->at_grow(self->_idx);
-  uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
-  uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  uint target        = get_latency_for_node(self);
+  uint start_latency = get_latency_for_node(LCA->_nodes[0]);
+  uint end_latency   = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
   bool in_latency    = (target <= start_latency);
-  const Block* root_block = _bbs[_root->_idx];
+  const Block* root_block = get_block_for_node(_root);
 
   // Turn off latency scheduling if scheduling is just plain off
   if (!C->do_scheduling())
@@ -1037,8 +1049,7 @@
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# Find cheaper block for latency %d: ",
-      _node_latency->at_grow(self->_idx));
+    tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
     self->dump();
     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
       LCA->_pre_order,
@@ -1067,9 +1078,9 @@
     if (mach && LCA == root_block)
       break;
 
-    uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
+    uint start_lat = get_latency_for_node(LCA->_nodes[0]);
     uint end_idx   = LCA->end_idx();
-    uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
+    uint end_lat   = get_latency_for_node(LCA->_nodes[end_idx]);
     double LCA_freq = LCA->_freq;
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -1111,7 +1122,7 @@
       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
     }
 #endif
-    _node_latency->at_put_grow(self->_idx, end_latency);
+    set_latency_for_node(self, end_latency);
     partial_latency_of_defs(self);
   }
 
@@ -1130,12 +1141,12 @@
     tty->print("\n#---- schedule_late ----\n");
 #endif
 
-  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, *this);
   Node *self;
 
   // Walk over all the nodes from last to first
   while (self = iter.next()) {
-    Block* early = _bbs[self->_idx];   // Earliest legal placement
+    Block* early = get_block_for_node(self); // Earliest legal placement
 
     if (self->is_top()) {
       // Top node goes in bb #2 with other constants.
@@ -1183,7 +1194,7 @@
       for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
         // For all uses, find LCA
         Node* use = self->fast_out(i);
-        LCA = raise_LCA_above_use(LCA, use, self, _bbs);
+        LCA = raise_LCA_above_use(LCA, use, self, this);
       }
     }  // (Hide defs of imax, i from rest of block.)
 
@@ -1191,7 +1202,7 @@
     // requirement for correctness but it reduces useless
     // interference between temps and other nodes.
     if (mach != NULL && mach->is_MachTemp()) {
-      _bbs.map(self->_idx, LCA);
+      map_node_to_block(self, LCA);
       LCA->add_inst(self);
       continue;
     }
@@ -1257,7 +1268,7 @@
 } // end ScheduleLate
 
 //------------------------------GlobalCodeMotion-------------------------------
-void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+void PhaseCFG::global_code_motion() {
   ResourceMark rm;
 
 #ifndef PRODUCT
@@ -1266,22 +1277,23 @@
   }
 #endif
 
-  // Initialize the bbs.map for things on the proj_list
-  uint i;
-  for( i=0; i < proj_list.size(); i++ )
-    _bbs.map(proj_list[i]->_idx, NULL);
+  // Initialize the node to block mapping for things on the proj_list
+  for (uint i = 0; i < _matcher.number_of_projections(); i++) {
+    unmap_node_from_block(_matcher.get_projection(i));
+  }
 
   // Set the basic block for Nodes pinned into blocks
-  Arena *a = Thread::current()->resource_area();
-  VectorSet visited(a);
-  schedule_pinned_nodes( visited );
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  schedule_pinned_nodes(visited);
 
   // Find the earliest Block any instruction can be placed in.  Some
   // instructions are pinned into Blocks.  Unpinned instructions can
   // appear in last block in which all their inputs occur.
   visited.Clear();
-  Node_List stack(a);
-  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  Node_List stack(arena);
+  // Pre-grow the list
+  stack.map((C->unique() >> 1) + 16, NULL);
   if (!schedule_early(visited, stack)) {
     // Bailout without retry
     C->record_method_not_compilable("early schedule failed");
@@ -1289,29 +1301,25 @@
   }
 
   // Build Def-Use edges.
-  proj_list.push(_root);        // Add real root as another root
-  proj_list.pop();
-
   // Compute the latency information (via backwards walk) for all the
   // instructions in the graph
   _node_latency = new GrowableArray<uint>(); // resource_area allocation
 
-  if( C->do_scheduling() )
-    ComputeLatenciesBackwards(visited, stack);
+  if (C->do_scheduling()) {
+    compute_latencies_backwards(visited, stack);
+  }
 
   // Now schedule all codes as LATE as possible.  This is the LCA in the
   // dominator tree of all USES of a value.  Pick the block with the least
   // loop nesting depth that is lowest in the dominator tree.
   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
   schedule_late(visited, stack);
-  if( C->failing() ) {
+  if (C->failing()) {
     // schedule_late fails only when graph is incorrect.
     assert(!VerifyGraphEdges, "verification should have failed");
     return;
   }
 
-  unique = C->unique();
-
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- Detect implicit null checks ----\n");
@@ -1334,10 +1342,11 @@
     // By reversing the loop direction we get a very minor gain on mpegaudio.
     // Feel free to revert to a forward loop for clarity.
     // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
-    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
-      Node *proj = matcher._null_check_tests[i  ];
-      Node *val  = matcher._null_check_tests[i+1];
-      _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons);
+    for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
+      Node* proj = _matcher._null_check_tests[i];
+      Node* val  = _matcher._null_check_tests[i + 1];
+      Block* block = get_block_for_node(proj);
+      block->implicit_null_check(this, proj, val, allowed_reasons);
       // The implicit_null_check will only perform the transformation
       // if the null branch is truly uncommon, *and* it leads to an
       // uncommon trap.  Combined with the too_many_traps guards
@@ -1354,11 +1363,11 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  uint max_idx = C->unique();
-  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
+  GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
   visited.Clear();
-  for (i = 0; i < _num_blocks; i++) {
-    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
         C->record_method_not_compilable("local schedule failed");
       }
@@ -1368,14 +1377,17 @@
 
   // If we inserted any instructions between a Call and his CatchNode,
   // clone the instructions on all paths below the Catch.
-  for( i=0; i < _num_blocks; i++ )
-    _blocks[i]->call_catch_cleanup(_bbs, C);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->call_catch_cleanup(this, C);
+  }
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- After GlobalCodeMotion ----\n");
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump();
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      Block* block = get_block(i);
+      block->dump();
     }
   }
 #endif
@@ -1383,10 +1395,29 @@
   _node_latency = (GrowableArray<uint> *)0xdeadbeef;
 }
 
+bool PhaseCFG::do_global_code_motion() {
+
+  build_dominator_tree();
+  if (C->failing()) {
+    return false;
+  }
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  estimate_block_frequency();
+
+  global_code_motion();
+
+  if (C->failing()) {
+    return false;
+  }
+
+  return true;
+}
 
 //------------------------------Estimate_Block_Frequency-----------------------
 // Estimate block frequencies based on IfNode probabilities.
-void PhaseCFG::Estimate_Block_Frequency() {
+void PhaseCFG::estimate_block_frequency() {
 
   // Force conditional branches leading to uncommon traps to be unlikely,
   // not because we get to the uncommon_trap with less relative frequency,
@@ -1394,18 +1425,20 @@
   // there once.
   if (C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
-      Block *pb = _bbs[root_blk->pred(i)->_idx];
+      Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
         worklist.push(pb);
       }
     }
     while (worklist.size() > 0) {
       Block* uct = worklist.pop();
-      if (uct == _broot) continue;
+      if (uct == get_root_block()) {
+        continue;
+      }
       for (uint i = 1; i < uct->num_preds(); i++) {
-        Block *pb = _bbs[uct->pred(i)->_idx];
+        Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1) {
           worklist.push(pb);
         } else if (pb->num_fall_throughs() == 2) {
@@ -1427,14 +1460,14 @@
   _root_loop->scale_freq();
 
   // Save outmost loop frequency for LRG frequency threshold
-  _outer_loop_freq = _root_loop->outer_loop_freq();
+  _outer_loop_frequency = _root_loop->outer_loop_freq();
 
   // force paths ending at uncommon traps to be infrequent
   if (!C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
-      Block *pb = _bbs[root_blk->pred(i)->_idx];
+      Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
         worklist.push(pb);
       }
@@ -1443,7 +1476,7 @@
       Block* uct = worklist.pop();
       uct->_freq = PROB_MIN;
       for (uint i = 1; i < uct->num_preds(); i++) {
-        Block *pb = _bbs[uct->pred(i)->_idx];
+        Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
           worklist.push(pb);
         }
@@ -1452,8 +1485,8 @@
   }
 
 #ifdef ASSERT
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* b = get_block(i);
     assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
   }
 #endif
@@ -1477,16 +1510,16 @@
 CFGLoop* PhaseCFG::create_loop_tree() {
 
 #ifdef ASSERT
-  assert( _blocks[0] == _broot, "" );
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  assert(get_block(0) == get_root_block(), "first block should be root block");
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
     // Check that _loop field are clear...we could clear them if not.
-    assert(b->_loop == NULL, "clear _loop expected");
+    assert(block->_loop == NULL, "clear _loop expected");
     // Sanity check that the RPO numbering is reflected in the _blocks array.
     // It doesn't have to be for the loop tree to be built, but if it is not,
     // then the blocks have been reordered since dom graph building...which
     // may question the RPO numbering
-    assert(b->_rpo == i, "unexpected reverse post order number");
+    assert(block->_rpo == i, "unexpected reverse post order number");
   }
 #endif
 
@@ -1496,14 +1529,14 @@
   Block_List worklist;
 
   // Assign blocks to loops
-  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
-    Block *b = _blocks[i];
+  for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
+    Block* block = get_block(i);
 
-    if (b->head()->is_Loop()) {
-      Block* loop_head = b;
+    if (block->head()->is_Loop()) {
+      Block* loop_head = block;
       assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
-      Block* tail = _bbs[tail_n->_idx];
+      Block* tail = get_block_for_node(tail_n);
 
       // Defensively filter out Loop nodes for non-single-entry loops.
       // For all reasonable loops, the head occurs before the tail in RPO.
@@ -1518,13 +1551,13 @@
         loop_head->_loop = nloop;
         // Add to nloop so push_pred() will skip over inner loops
         nloop->add_member(loop_head);
-        nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs);
+        nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, this);
 
         while (worklist.size() > 0) {
           Block* member = worklist.pop();
           if (member != loop_head) {
             for (uint j = 1; j < member->num_preds(); j++) {
-              nloop->push_pred(member, j, worklist, _bbs);
+              nloop->push_pred(member, j, worklist, this);
             }
           }
         }
@@ -1534,23 +1567,23 @@
 
   // Create a member list for each loop consisting
   // of both blocks and (immediate child) loops.
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    CFGLoop* lp = b->_loop;
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    CFGLoop* lp = block->_loop;
     if (lp == NULL) {
       // Not assigned to a loop. Add it to the method's pseudo loop.
-      b->_loop = root_loop;
+      block->_loop = root_loop;
       lp = root_loop;
     }
-    if (lp == root_loop || b != lp->head()) { // loop heads are already members
-      lp->add_member(b);
+    if (lp == root_loop || block != lp->head()) { // loop heads are already members
+      lp->add_member(block);
     }
     if (lp != root_loop) {
       if (lp->parent() == NULL) {
         // Not a nested loop. Make it a child of the method's pseudo loop.
         root_loop->add_nested_loop(lp);
       }
-      if (b == lp->head()) {
+      if (block == lp->head()) {
         // Add nested loop to member list of parent loop.
         lp->parent()->add_member(lp);
       }
@@ -1561,9 +1594,9 @@
 }
 
 //------------------------------push_pred--------------------------------------
-void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) {
+void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg) {
   Node* pred_n = blk->pred(i);
-  Block* pred = node_to_blk[pred_n->_idx];
+  Block* pred = cfg->get_block_for_node(pred_n);
   CFGLoop *pred_loop = pred->_loop;
   if (pred_loop == NULL) {
     // Filter out blocks for non-single-entry loops.
@@ -1584,7 +1617,7 @@
       Block* pred_head = pred_loop->head();
       assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       assert(pred_head != head(), "loop head in only one loop");
-      push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk);
+      push_pred(pred_head, LoopNode::EntryControl, worklist, cfg);
     } else {
       assert(pred_loop->_parent == this && _parent == NULL, "just checking");
     }