8210829: Modularize allocations in C2
authorrkennke
Wed, 19 Sep 2018 21:31:33 +0200
changeset 51806 1ecc914fb707
parent 51805 eb2adb0a9b09
child 51807 3221f5e14866
8210829: Modularize allocations in C2 Reviewed-by: kvn, roland
src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
src/hotspot/share/opto/macro.cpp
src/hotspot/share/opto/macro.hpp
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Wed Sep 19 10:51:06 2018 -0700
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp	Wed Sep 19 21:31:33 2018 +0200
@@ -27,6 +27,7 @@
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "opto/idealKit.hpp"
+#include "opto/macro.hpp"
 #include "opto/narrowptrnode.hpp"
 #include "utilities/macros.hpp"
 
@@ -601,3 +602,130 @@
     kit->set_all_memory(n);
   }
 }
+
+Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* ctrl, Node* mem, Node* toobig_false, Node* size_in_bytes,
+                                 Node*& i_o, Node*& needgc_ctrl,
+                                 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
+                                 intx prefetch_lines) const {
+
+  Node* eden_top_adr;
+  Node* eden_end_adr;
+
+  macro->set_eden_pointers(eden_top_adr, eden_end_adr);
+
+  // Load Eden::end.  Loop invariant and hoisted.
+  //
+  // Note: We set the control input on "eden_end" and "old_eden_top" when using
+  //       a TLAB to work around a bug where these values were being moved across
+  //       a safepoint.  These are not oops, so they cannot be include in the oop
+  //       map, but they can be changed by a GC.   The proper way to fix this would
+  //       be to set the raw memory state when generating a  SafepointNode.  However
+  //       this will require extensive changes to the loop optimization in order to
+  //       prevent a degradation of the optimization.
+  //       See comment in memnode.hpp, around line 227 in class LoadPNode.
+  Node *eden_end = macro->make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
+  // We need a Region for the loop-back contended case.
+  enum { fall_in_path = 1, contended_loopback_path = 2 };
+  Node *contended_region;
+  Node *contended_phi_rawmem;
+  if (UseTLAB) {
+    contended_region = toobig_false;
+    contended_phi_rawmem = mem;
+  } else {
+    contended_region = new RegionNode(3);
+    contended_phi_rawmem = new PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+    // Now handle the passing-too-big test.  We fall into the contended
+    // loop-back merge point.
+    contended_region    ->init_req(fall_in_path, toobig_false);
+    contended_phi_rawmem->init_req(fall_in_path, mem);
+    macro->transform_later(contended_region);
+    macro->transform_later(contended_phi_rawmem);
+  }
+
+  // Load(-locked) the heap top.
+  // See note above concerning the control input when using a TLAB
+  Node *old_eden_top = UseTLAB
+    ? new LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered)
+    : new LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, MemNode::acquire);
+
+  macro->transform_later(old_eden_top);
+  // Add to heap top to get a new heap top
+  Node *new_eden_top = new AddPNode(macro->top(), old_eden_top, size_in_bytes);
+  macro->transform_later(new_eden_top);
+  // Check for needing a GC; compare against heap end
+  Node *needgc_cmp = new CmpPNode(new_eden_top, eden_end);
+  macro->transform_later(needgc_cmp);
+  Node *needgc_bol = new BoolNode(needgc_cmp, BoolTest::ge);
+  macro->transform_later(needgc_bol);
+  IfNode *needgc_iff = new IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
+  macro->transform_later(needgc_iff);
+
+  // Plug the failing-heap-space-need-gc test into the slow-path region
+  Node *needgc_true = new IfTrueNode(needgc_iff);
+  macro->transform_later(needgc_true);
+  needgc_ctrl = needgc_true;
+
+  // No need for a GC.  Setup for the Store-Conditional
+  Node *needgc_false = new IfFalseNode(needgc_iff);
+  macro->transform_later(needgc_false);
+
+  i_o = macro->prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
+                                   old_eden_top, new_eden_top, prefetch_lines);
+
+  Node* fast_oop = old_eden_top;
+
+  // Store (-conditional) the modified eden top back down.
+  // StorePConditional produces flags for a test PLUS a modified raw
+  // memory state.
+  if (UseTLAB) {
+    Node* store_eden_top =
+      new StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
+                     TypeRawPtr::BOTTOM, new_eden_top, MemNode::unordered);
+    macro->transform_later(store_eden_top);
+    fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
+    fast_oop_rawmem = store_eden_top;
+  } else {
+    Node* store_eden_top =
+      new StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
+                                new_eden_top, fast_oop/*old_eden_top*/);
+    macro->transform_later(store_eden_top);
+    Node *contention_check = new BoolNode(store_eden_top, BoolTest::ne);
+    macro->transform_later(contention_check);
+    store_eden_top = new SCMemProjNode(store_eden_top);
+    macro->transform_later(store_eden_top);
+
+    // If not using TLABs, check to see if there was contention.
+    IfNode *contention_iff = new IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
+    macro->transform_later(contention_iff);
+    Node *contention_true = new IfTrueNode(contention_iff);
+    macro->transform_later(contention_true);
+    // If contention, loopback and try again.
+    contended_region->init_req(contended_loopback_path, contention_true);
+    contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
+
+    // Fast-path succeeded with no contention!
+    Node *contention_false = new IfFalseNode(contention_iff);
+    macro->transform_later(contention_false);
+    fast_oop_ctrl = contention_false;
+
+    // Bump total allocated bytes for this thread
+    Node* thread = new ThreadLocalNode();
+    macro->transform_later(thread);
+    Node* alloc_bytes_adr = macro->basic_plus_adr(macro->top()/*not oop*/, thread,
+                                                  in_bytes(JavaThread::allocated_bytes_offset()));
+    Node* alloc_bytes = macro->make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
+                                         0, TypeLong::LONG, T_LONG);
+#ifdef _LP64
+    Node* alloc_size = size_in_bytes;
+#else
+    Node* alloc_size = new ConvI2LNode(size_in_bytes);
+    macro->transform_later(alloc_size);
+#endif
+    Node* new_alloc_bytes = new AddLNode(alloc_bytes, alloc_size);
+    macro->transform_later(new_alloc_bytes);
+    fast_oop_rawmem = macro->make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
+                                        0, new_alloc_bytes, T_LONG);
+  }
+  return fast_oop;
+}
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Wed Sep 19 10:51:06 2018 -0700
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp	Wed Sep 19 21:31:33 2018 +0200
@@ -192,6 +192,11 @@
 
   virtual Node* resolve(GraphKit* kit, Node* n, DecoratorSet decorators) const { return n; }
 
+  virtual Node* obj_allocate(PhaseMacroExpand* macro, Node* ctrl, Node* mem, Node* toobig_false, Node* size_in_bytes,
+                             Node*& i_o, Node*& needgc_ctrl,
+                             Node*& fast_oop_ctrl, Node*& fast_oop_rawmem,
+                             intx prefetch_lines) const;
+
   // These are general helper methods used by C2
   virtual bool array_copy_requires_gc_barriers(BasicType type) const { return false; }
 
--- a/src/hotspot/share/opto/macro.cpp	Wed Sep 19 10:51:06 2018 -0700
+++ b/src/hotspot/share/opto/macro.cpp	Wed Sep 19 21:31:33 2018 +0200
@@ -1307,143 +1307,35 @@
       mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
     }
 
-    Node* eden_top_adr;
-    Node* eden_end_adr;
-
-    set_eden_pointers(eden_top_adr, eden_end_adr);
-
-    // Load Eden::end.  Loop invariant and hoisted.
-    //
-    // Note: We set the control input on "eden_end" and "old_eden_top" when using
-    //       a TLAB to work around a bug where these values were being moved across
-    //       a safepoint.  These are not oops, so they cannot be include in the oop
-    //       map, but they can be changed by a GC.   The proper way to fix this would
-    //       be to set the raw memory state when generating a  SafepointNode.  However
-    //       this will require extensive changes to the loop optimization in order to
-    //       prevent a degradation of the optimization.
-    //       See comment in memnode.hpp, around line 227 in class LoadPNode.
-    Node *eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
-
     // allocate the Region and Phi nodes for the result
     result_region = new RegionNode(3);
     result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
     result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
     result_phi_i_o    = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
 
-    // We need a Region for the loop-back contended case.
-    enum { fall_in_path = 1, contended_loopback_path = 2 };
-    Node *contended_region;
-    Node *contended_phi_rawmem;
-    if (UseTLAB) {
-      contended_region = toobig_false;
-      contended_phi_rawmem = mem;
-    } else {
-      contended_region = new RegionNode(3);
-      contended_phi_rawmem = new PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
-      // Now handle the passing-too-big test.  We fall into the contended
-      // loop-back merge point.
-      contended_region    ->init_req(fall_in_path, toobig_false);
-      contended_phi_rawmem->init_req(fall_in_path, mem);
-      transform_later(contended_region);
-      transform_later(contended_phi_rawmem);
-    }
-
-    // Load(-locked) the heap top.
-    // See note above concerning the control input when using a TLAB
-    Node *old_eden_top = UseTLAB
-      ? new LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, MemNode::unordered)
-      : new LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, MemNode::acquire);
-
-    transform_later(old_eden_top);
-    // Add to heap top to get a new heap top
-    Node *new_eden_top = new AddPNode(top(), old_eden_top, size_in_bytes);
-    transform_later(new_eden_top);
-    // Check for needing a GC; compare against heap end
-    Node *needgc_cmp = new CmpPNode(new_eden_top, eden_end);
-    transform_later(needgc_cmp);
-    Node *needgc_bol = new BoolNode(needgc_cmp, BoolTest::ge);
-    transform_later(needgc_bol);
-    IfNode *needgc_iff = new IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
-    transform_later(needgc_iff);
-
-    // Plug the failing-heap-space-need-gc test into the slow-path region
-    Node *needgc_true = new IfTrueNode(needgc_iff);
-    transform_later(needgc_true);
-    if (initial_slow_test) {
-      slow_region->init_req(need_gc_path, needgc_true);
-      // This completes all paths into the slow merge point
-      transform_later(slow_region);
-    } else {                      // No initial slow path needed!
-      // Just fall from the need-GC path straight into the VM call.
-      slow_region = needgc_true;
-    }
-    // No need for a GC.  Setup for the Store-Conditional
-    Node *needgc_false = new IfFalseNode(needgc_iff);
-    transform_later(needgc_false);
-
     // Grab regular I/O before optional prefetch may change it.
     // Slow-path does no I/O so just set it to the original I/O.
     result_phi_i_o->init_req(slow_result_path, i_o);
 
-    i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
-                              old_eden_top, new_eden_top, length);
-
+    Node* needgc_ctrl = NULL;
     // Name successful fast-path variables
-    Node* fast_oop = old_eden_top;
     Node* fast_oop_ctrl;
     Node* fast_oop_rawmem;
 
-    // Store (-conditional) the modified eden top back down.
-    // StorePConditional produces flags for a test PLUS a modified raw
-    // memory state.
-    if (UseTLAB) {
-      Node* store_eden_top =
-        new StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
-                              TypeRawPtr::BOTTOM, new_eden_top, MemNode::unordered);
-      transform_later(store_eden_top);
-      fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
-      fast_oop_rawmem = store_eden_top;
-    } else {
-      Node* store_eden_top =
-        new StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
-                                         new_eden_top, fast_oop/*old_eden_top*/);
-      transform_later(store_eden_top);
-      Node *contention_check = new BoolNode(store_eden_top, BoolTest::ne);
-      transform_later(contention_check);
-      store_eden_top = new SCMemProjNode(store_eden_top);
-      transform_later(store_eden_top);
+    intx prefetch_lines = length != NULL ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
+
+    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+    Node* fast_oop = bs->obj_allocate(this, ctrl, mem, toobig_false, size_in_bytes, i_o, needgc_ctrl,
+                                      fast_oop_ctrl, fast_oop_rawmem,
+                                      prefetch_lines);
 
-      // If not using TLABs, check to see if there was contention.
-      IfNode *contention_iff = new IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
-      transform_later(contention_iff);
-      Node *contention_true = new IfTrueNode(contention_iff);
-      transform_later(contention_true);
-      // If contention, loopback and try again.
-      contended_region->init_req(contended_loopback_path, contention_true);
-      contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
-
-      // Fast-path succeeded with no contention!
-      Node *contention_false = new IfFalseNode(contention_iff);
-      transform_later(contention_false);
-      fast_oop_ctrl = contention_false;
-
-      // Bump total allocated bytes for this thread
-      Node* thread = new ThreadLocalNode();
-      transform_later(thread);
-      Node* alloc_bytes_adr = basic_plus_adr(top()/*not oop*/, thread,
-                                             in_bytes(JavaThread::allocated_bytes_offset()));
-      Node* alloc_bytes = make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
-                                    0, TypeLong::LONG, T_LONG);
-#ifdef _LP64
-      Node* alloc_size = size_in_bytes;
-#else
-      Node* alloc_size = new ConvI2LNode(size_in_bytes);
-      transform_later(alloc_size);
-#endif
-      Node* new_alloc_bytes = new AddLNode(alloc_bytes, alloc_size);
-      transform_later(new_alloc_bytes);
-      fast_oop_rawmem = make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
-                                   0, new_alloc_bytes, T_LONG);
+    if (initial_slow_test) {
+      slow_region->init_req(need_gc_path, needgc_ctrl);
+      // This completes all paths into the slow merge point
+      transform_later(slow_region);
+    } else {                      // No initial slow path needed!
+      // Just fall from the need-GC path straight into the VM call.
+      slow_region = needgc_ctrl;
     }
 
     InitializeNode* init = alloc->initialization();
@@ -1774,7 +1666,7 @@
 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
                                         Node*& contended_phi_rawmem,
                                         Node* old_eden_top, Node* new_eden_top,
-                                        Node* length) {
+                                        intx lines) {
    enum { fall_in_path = 1, pf_path = 2 };
    if( UseTLAB && AllocatePrefetchStyle == 2 ) {
       // Generate prefetch allocation with watermark check.
@@ -1832,7 +1724,6 @@
 
       Node *prefetch_adr;
       Node *prefetch;
-      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       uint step_size = AllocatePrefetchStepSize;
       uint distance = 0;
 
@@ -1865,7 +1756,6 @@
       // This code is used to generate 1 prefetch instruction per cache line.
 
       // Generate several prefetch instructions.
-      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       uint step_size = AllocatePrefetchStepSize;
       uint distance = AllocatePrefetchDistance;
 
@@ -1904,7 +1794,6 @@
       Node *prefetch_adr;
       Node *prefetch;
       // Generate several prefetch instructions.
-      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       uint step_size = AllocatePrefetchStepSize;
       uint distance = AllocatePrefetchDistance;
       for ( uint i = 0; i < lines; i++ ) {
--- a/src/hotspot/share/opto/macro.hpp	Wed Sep 19 10:51:06 2018 -0700
+++ b/src/hotspot/share/opto/macro.hpp	Wed Sep 19 21:31:33 2018 +0200
@@ -193,11 +193,6 @@
                           Node* klass_node, Node* length,
                           Node* size_in_bytes);
 
-  Node* prefetch_allocation(Node* i_o,
-                            Node*& needgc_false, Node*& contended_phi_rawmem,
-                            Node* old_eden_top, Node* new_eden_top,
-                            Node* length);
-
   Node* make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, Node* mem, BasicType ft, const Type *ftype, AllocateNode *alloc);
 
 public:
@@ -215,6 +210,11 @@
   Node* longcon(jlong con)      const { return _igvn.longcon(con); }
   Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
   Node* top()                   const { return C->top(); }
+
+  Node* prefetch_allocation(Node* i_o,
+                            Node*& needgc_false, Node*& contended_phi_rawmem,
+                            Node* old_eden_top, Node* new_eden_top,
+                            intx lines);
 };
 
 #endif // SHARE_VM_OPTO_MACRO_HPP