hotspot/src/share/vm/opto/gcm.cpp
changeset 19330 49d6711171e6
parent 19279 4be3c2e6663c
child 19717 7819ffdaf0ff
child 22828 17ecb098bc1e
--- a/hotspot/src/share/vm/opto/gcm.cpp	Thu Aug 15 11:59:19 2013 -0700
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Fri Aug 16 10:23:55 2013 +0200
@@ -121,27 +121,30 @@
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
-void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
   // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique()+8);
+  GrowableArray <Node *> spstack(C->unique() + 8);
   spstack.push(_root);
-  while ( spstack.is_nonempty() ) {
-    Node *n = spstack.pop();
-    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
-      if( n->pinned() && !has_block(n)) {  // Pinned?  Nail it down!
-        assert( n->in(0), "pinned Node must have Control" );
+  while (spstack.is_nonempty()) {
+    Node* node = spstack.pop();
+    if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
+      if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
+        assert(node->in(0), "pinned Node must have Control");
         // Before setting block replace block_proj control edge
-        replace_block_proj_ctrl(n);
-        Node *input = n->in(0);
+        replace_block_proj_ctrl(node);
+        Node* input = node->in(0);
         while (!input->is_block_start()) {
           input = input->in(0);
         }
-        Block *b = get_block_for_node(input); // Basic block of controlling input
-        schedule_node_into_block(n, b);
+        Block* block = get_block_for_node(input); // Basic block of controlling input
+        schedule_node_into_block(node, block);
       }
-      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
-        if( n->in(i) != NULL )
-          spstack.push(n->in(i));
+
+      // process all inputs that are non NULL
+      for (int i = node->req() - 1; i >= 0; --i) {
+        if (node->in(i) != NULL) {
+          spstack.push(node->in(i));
+        }
       }
     }
   }
@@ -205,32 +208,29 @@
 // which all their inputs occur.
 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
   // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
-  // roots.push(_root); _root will be processed among C->top() inputs
+  Node_Stack nstack(roots.Size() + 8);
+  // _root will be processed among C->top() inputs
   roots.push(C->top());
   visited.set(C->top()->_idx);
 
   while (roots.size() != 0) {
     // Use local variables nstack_top_n & nstack_top_i to cache values
     // on stack's top.
-    Node *nstack_top_n = roots.pop();
-    uint  nstack_top_i = 0;
-//while_nstack_nonempty:
+    Node* parent_node = roots.pop();
+    uint  input_index = 0;
+
     while (true) {
-      // Get parent node and next input's index from stack's top.
-      Node *n = nstack_top_n;
-      uint  i = nstack_top_i;
-
-      if (i == 0) {
+      if (input_index == 0) {
         // Fixup some control.  Constants without control get attached
         // to root and nodes that use is_block_proj() nodes should be attached
         // to the region that starts their block.
-        const Node *in0 = n->in(0);
-        if (in0 != NULL) {              // Control-dependent?
-          replace_block_proj_ctrl(n);
-        } else {               // n->in(0) == NULL
-          if (n->req() == 1) { // This guy is a constant with NO inputs?
-            n->set_req(0, _root);
+        const Node* control_input = parent_node->in(0);
+        if (control_input != NULL) {
+          replace_block_proj_ctrl(parent_node);
+        } else {
+          // Is a constant with NO inputs?
+          if (parent_node->req() == 1) {
+            parent_node->set_req(0, _root);
           }
         }
       }
@@ -239,37 +239,47 @@
       // input is already in a block we quit following inputs (to avoid
       // cycles). Instead we put that Node on a worklist to be handled
       // later (since IT'S inputs may not have a block yet).
-      bool done = true;              // Assume all n's inputs will be processed
-      while (i < n->len()) {         // For all inputs
-        Node *in = n->in(i);         // Get input
-        ++i;
-        if (in == NULL) continue;    // Ignore NULL, missing inputs
+
+      // Assume all n's inputs will be processed
+      bool done = true;
+
+      while (input_index < parent_node->len()) {
+        Node* in = parent_node->in(input_index++);
+        if (in == NULL) {
+          continue;
+        }
+
         int is_visited = visited.test_set(in->_idx);
-        if (!has_block(in)) { // Missing block selection?
+        if (!has_block(in)) {
           if (is_visited) {
-            // assert( !visited.test(in->_idx), "did not schedule early" );
             return false;
           }
-          nstack.push(n, i);         // Save parent node and next input's index.
-          nstack_top_n = in;         // Process current input now.
-          nstack_top_i = 0;
-          done = false;              // Not all n's inputs processed.
-          break; // continue while_nstack_nonempty;
-        } else if (!is_visited) {    // Input not yet visited?
-          roots.push(in);            // Visit this guy later, using worklist
+          // Save parent node and next input's index.
+          nstack.push(parent_node, input_index);
+          // Process current input now.
+          parent_node = in;
+          input_index = 0;
+          // Not all n's inputs processed.
+          done = false;
+          break;
+        } else if (!is_visited) {
+          // Visit this guy later, using worklist
+          roots.push(in);
         }
       }
+
       if (done) {
         // All of n's inputs have been processed, complete post-processing.
 
         // Some instructions are pinned into a block.  These include Region,
         // Phi, Start, Return, and other control-dependent instructions and
         // any projections which depend on them.
-        if (!n->pinned()) {
+        if (!parent_node->pinned()) {
           // Set earliest legal block.
-          map_node_to_block(n, find_deepest_input(n, this));
+          Block* earliest_block = find_deepest_input(parent_node, this);
+          map_node_to_block(parent_node, earliest_block);
         } else {
-          assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
+          assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
         }
 
         if (nstack.is_empty()) {
@@ -278,12 +288,12 @@
           break;
         }
         // Get saved parent node and next input's index.
-        nstack_top_n = nstack.node();
-        nstack_top_i = nstack.index();
+        parent_node = nstack.node();
+        input_index = nstack.index();
         nstack.pop();
-      } //    if (done)
-    }   // while (true)
-  }     // while (roots.size() != 0)
+      }
+    }
+  }
   return true;
 }
 
@@ -847,7 +857,7 @@
 
 //------------------------------ComputeLatenciesBackwards----------------------
 // Compute the latency of all the instructions.
-void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
 #ifndef PRODUCT
   if (trace_opto_pipelining())
     tty->print("\n#---- ComputeLatenciesBackwards ----\n");
@@ -870,31 +880,34 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
 
-  if (n->is_Proj())
+  if (n->is_Proj()) {
     n = n->in(0);
+  }
 
-  if (n->is_Root())
+  if (n->is_Root()) {
     return;
+  }
 
   uint nlen = n->len();
-  uint use_latency = _node_latency->at_grow(n->_idx);
+  uint use_latency = get_latency_for_node(n);
   uint use_pre_order = get_block_for_node(n)->_pre_order;
 
-  for ( uint j=0; j<nlen; j++ ) {
+  for (uint j = 0; j < nlen; j++) {
     Node *def = n->in(j);
 
-    if (!def || def == n)
+    if (!def || def == n) {
       continue;
+    }
 
     // Walk backwards thru projections
-    if (def->is_Proj())
+    if (def->is_Proj()) {
       def = def->in(0);
+    }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -907,22 +920,20 @@
     Block *def_block = get_block_for_node(def);
     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 
-    if ( (use_pre_order <  def_pre_order) ||
-         (use_pre_order == def_pre_order && n->is_Phi()) )
+    if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
       continue;
+    }
 
     uint delta_latency = n->latency(j);
     uint current_latency = delta_latency + use_latency;
 
-    if (_node_latency->at_grow(def->_idx) < current_latency) {
-      _node_latency->at_put_grow(def->_idx, current_latency);
+    if (get_latency_for_node(def) < current_latency) {
+      set_latency_for_node(def, current_latency);
     }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
-      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
-                    use_latency, j, delta_latency, current_latency, def->_idx,
-                    _node_latency->at_grow(def->_idx));
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
     }
 #endif
   }
@@ -957,7 +968,7 @@
       return 0;
 
     uint nlen = use->len();
-    uint nl = _node_latency->at_grow(use->_idx);
+    uint nl = get_latency_for_node(use);
 
     for ( uint j=0; j<nlen; j++ ) {
       if (use->in(j) == n) {
@@ -992,8 +1003,7 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
@@ -1006,7 +1016,7 @@
     if (latency < l) latency = l;
   }
 
-  _node_latency->at_put_grow(n->_idx, latency);
+  set_latency_for_node(n, latency);
 }
 
 //------------------------------hoist_to_cheaper_block-------------------------
@@ -1016,9 +1026,9 @@
   const double delta = 1+PROB_UNLIKELY_MAG(4);
   Block* least       = LCA;
   double least_freq  = least->_freq;
-  uint target        = _node_latency->at_grow(self->_idx);
-  uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
-  uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  uint target        = get_latency_for_node(self);
+  uint start_latency = get_latency_for_node(LCA->_nodes[0]);
+  uint end_latency   = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
   bool in_latency    = (target <= start_latency);
   const Block* root_block = get_block_for_node(_root);
 
@@ -1035,8 +1045,7 @@
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# Find cheaper block for latency %d: ",
-      _node_latency->at_grow(self->_idx));
+    tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
     self->dump();
     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
       LCA->_pre_order,
@@ -1065,9 +1074,9 @@
     if (mach && LCA == root_block)
       break;
 
-    uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
+    uint start_lat = get_latency_for_node(LCA->_nodes[0]);
     uint end_idx   = LCA->end_idx();
-    uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
+    uint end_lat   = get_latency_for_node(LCA->_nodes[end_idx]);
     double LCA_freq = LCA->_freq;
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -1109,7 +1118,7 @@
       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
     }
 #endif
-    _node_latency->at_put_grow(self->_idx, end_latency);
+    set_latency_for_node(self, end_latency);
     partial_latency_of_defs(self);
   }
 
@@ -1255,7 +1264,7 @@
 } // end ScheduleLate
 
 //------------------------------GlobalCodeMotion-------------------------------
-void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+void PhaseCFG::global_code_motion() {
   ResourceMark rm;
 
 #ifndef PRODUCT
@@ -1265,21 +1274,22 @@
 #endif
 
   // Initialize the node to block mapping for things on the proj_list
-  for (uint i = 0; i < proj_list.size(); i++) {
-    unmap_node_from_block(proj_list[i]);
+  for (uint i = 0; i < _matcher.number_of_projections(); i++) {
+    unmap_node_from_block(_matcher.get_projection(i));
   }
 
   // Set the basic block for Nodes pinned into blocks
-  Arena *a = Thread::current()->resource_area();
-  VectorSet visited(a);
-  schedule_pinned_nodes( visited );
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  schedule_pinned_nodes(visited);
 
   // Find the earliest Block any instruction can be placed in.  Some
   // instructions are pinned into Blocks.  Unpinned instructions can
   // appear in last block in which all their inputs occur.
   visited.Clear();
-  Node_List stack(a);
-  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  Node_List stack(arena);
+  // Pre-grow the list
+  stack.map((C->unique() >> 1) + 16, NULL);
   if (!schedule_early(visited, stack)) {
     // Bailout without retry
     C->record_method_not_compilable("early schedule failed");
@@ -1287,29 +1297,25 @@
   }
 
   // Build Def-Use edges.
-  proj_list.push(_root);        // Add real root as another root
-  proj_list.pop();
-
   // Compute the latency information (via backwards walk) for all the
   // instructions in the graph
   _node_latency = new GrowableArray<uint>(); // resource_area allocation
 
-  if( C->do_scheduling() )
-    ComputeLatenciesBackwards(visited, stack);
+  if (C->do_scheduling()) {
+    compute_latencies_backwards(visited, stack);
+  }
 
   // Now schedule all codes as LATE as possible.  This is the LCA in the
   // dominator tree of all USES of a value.  Pick the block with the least
   // loop nesting depth that is lowest in the dominator tree.
   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
   schedule_late(visited, stack);
-  if( C->failing() ) {
+  if (C->failing()) {
     // schedule_late fails only when graph is incorrect.
     assert(!VerifyGraphEdges, "verification should have failed");
     return;
   }
 
-  unique = C->unique();
-
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- Detect implicit null checks ----\n");
@@ -1332,10 +1338,11 @@
     // By reversing the loop direction we get a very minor gain on mpegaudio.
     // Feel free to revert to a forward loop for clarity.
     // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
-    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
-      Node *proj = matcher._null_check_tests[i  ];
-      Node *val  = matcher._null_check_tests[i+1];
-      get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
+    for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
+      Node* proj = _matcher._null_check_tests[i];
+      Node* val  = _matcher._null_check_tests[i + 1];
+      Block* block = get_block_for_node(proj);
+      block->implicit_null_check(this, proj, val, allowed_reasons);
       // The implicit_null_check will only perform the transformation
       // if the null branch is truly uncommon, *and* it leads to an
       // uncommon trap.  Combined with the too_many_traps guards
@@ -1352,11 +1359,11 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  uint max_idx = C->unique();
-  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
+  GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
   visited.Clear();
-  for (uint i = 0; i < _num_blocks; i++) {
-    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
         C->record_method_not_compilable("local schedule failed");
       }
@@ -1366,15 +1373,17 @@
 
   // If we inserted any instructions between a Call and his CatchNode,
   // clone the instructions on all paths below the Catch.
-  for (uint i = 0; i < _num_blocks; i++) {
-    _blocks[i]->call_catch_cleanup(this, C);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->call_catch_cleanup(this, C);
   }
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- After GlobalCodeMotion ----\n");
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump();
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      Block* block = get_block(i);
+      block->dump();
     }
   }
 #endif
@@ -1382,10 +1391,29 @@
   _node_latency = (GrowableArray<uint> *)0xdeadbeef;
 }
 
+bool PhaseCFG::do_global_code_motion() {
+
+  build_dominator_tree();
+  if (C->failing()) {
+    return false;
+  }
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  estimate_block_frequency();
+
+  global_code_motion();
+
+  if (C->failing()) {
+    return false;
+  }
+
+  return true;
+}
 
 //------------------------------Estimate_Block_Frequency-----------------------
 // Estimate block frequencies based on IfNode probabilities.
-void PhaseCFG::Estimate_Block_Frequency() {
+void PhaseCFG::estimate_block_frequency() {
 
   // Force conditional branches leading to uncommon traps to be unlikely,
   // not because we get to the uncommon_trap with less relative frequency,
@@ -1393,7 +1421,7 @@
   // there once.
   if (C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
       Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
@@ -1402,7 +1430,9 @@
     }
     while (worklist.size() > 0) {
       Block* uct = worklist.pop();
-      if (uct == _broot) continue;
+      if (uct == get_root_block()) {
+        continue;
+      }
       for (uint i = 1; i < uct->num_preds(); i++) {
         Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1) {
@@ -1426,12 +1456,12 @@
   _root_loop->scale_freq();
 
   // Save outmost loop frequency for LRG frequency threshold
-  _outer_loop_freq = _root_loop->outer_loop_freq();
+  _outer_loop_frequency = _root_loop->outer_loop_freq();
 
   // force paths ending at uncommon traps to be infrequent
   if (!C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
       Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
@@ -1451,8 +1481,8 @@
   }
 
 #ifdef ASSERT
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* b = get_block(i);
     assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
   }
 #endif
@@ -1476,16 +1506,16 @@
 CFGLoop* PhaseCFG::create_loop_tree() {
 
 #ifdef ASSERT
-  assert( _blocks[0] == _broot, "" );
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  assert(get_block(0) == get_root_block(), "first block should be root block");
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
     // Check that _loop field are clear...we could clear them if not.
-    assert(b->_loop == NULL, "clear _loop expected");
+    assert(block->_loop == NULL, "clear _loop expected");
     // Sanity check that the RPO numbering is reflected in the _blocks array.
     // It doesn't have to be for the loop tree to be built, but if it is not,
     // then the blocks have been reordered since dom graph building...which
     // may question the RPO numbering
-    assert(b->_rpo == i, "unexpected reverse post order number");
+    assert(block->_rpo == i, "unexpected reverse post order number");
   }
 #endif
 
@@ -1495,11 +1525,11 @@
   Block_List worklist;
 
   // Assign blocks to loops
-  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
-    Block *b = _blocks[i];
+  for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
+    Block* block = get_block(i);
 
-    if (b->head()->is_Loop()) {
-      Block* loop_head = b;
+    if (block->head()->is_Loop()) {
+      Block* loop_head = block;
       assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
       Block* tail = get_block_for_node(tail_n);
@@ -1533,23 +1563,23 @@
 
   // Create a member list for each loop consisting
   // of both blocks and (immediate child) loops.
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    CFGLoop* lp = b->_loop;
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    CFGLoop* lp = block->_loop;
     if (lp == NULL) {
       // Not assigned to a loop. Add it to the method's pseudo loop.
-      b->_loop = root_loop;
+      block->_loop = root_loop;
       lp = root_loop;
     }
-    if (lp == root_loop || b != lp->head()) { // loop heads are already members
-      lp->add_member(b);
+    if (lp == root_loop || block != lp->head()) { // loop heads are already members
+      lp->add_member(block);
     }
     if (lp != root_loop) {
       if (lp->parent() == NULL) {
         // Not a nested loop. Make it a child of the method's pseudo loop.
         root_loop->add_nested_loop(lp);
       }
-      if (b == lp->head()) {
+      if (block == lp->head()) {
         // Add nested loop to member list of parent loop.
         lp->parent()->add_member(lp);
       }