7081933: Use zeroing elimination optimization for large array
authorkvn
Mon, 26 Sep 2011 10:24:05 -0700
changeset 10566 630c177ec580
parent 10565 dc90c239f4ec
child 10567 149651837c4a
7081933: Use zeroing elimination optimization for large array Summary: Don't zero new typeArray during runtime call if the allocation is followed by arraycopy into it. Reviewed-by: twisti
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
hotspot/src/share/vm/gc_interface/collectedHeap.hpp
hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp
hotspot/src/share/vm/memory/oopFactory.cpp
hotspot/src/share/vm/memory/oopFactory.hpp
hotspot/src/share/vm/oops/typeArrayKlass.cpp
hotspot/src/share/vm/oops/typeArrayKlass.hpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/macro.cpp
hotspot/src/share/vm/opto/memnode.cpp
hotspot/src/share/vm/opto/memnode.hpp
hotspot/src/share/vm/opto/runtime.cpp
hotspot/src/share/vm/opto/runtime.hpp
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -2359,10 +2359,10 @@
     for (int off = 0; off < 64; off += 16) {
       if (use_prefetch && (off & 31) == 0) {
         if (ArraycopySrcPrefetchDistance > 0) {
-          __ prefetch(from, ArraycopySrcPrefetchDistance, Assembler::severalReads);
+          __ prefetch(from, ArraycopySrcPrefetchDistance+off, Assembler::severalReads);
         }
         if (ArraycopyDstPrefetchDistance > 0) {
-          __ prefetch(to, ArraycopyDstPrefetchDistance, Assembler::severalWritesAndPossiblyReads);
+          __ prefetch(to, ArraycopyDstPrefetchDistance+off, Assembler::severalWritesAndPossiblyReads);
         }
       }
       __ ldx(from,  off+0, O4);
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -322,6 +322,7 @@
   // General obj/array allocation facilities.
   inline static oop obj_allocate(KlassHandle klass, int size, TRAPS);
   inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS);
+  inline static oop array_allocate_nozero(KlassHandle klass, int size, int length, TRAPS);
 
   // Special obj/array allocation facilities.
   // Some heaps may want to manage "permanent" data uniquely. These default
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -274,6 +274,23 @@
   return (oop)obj;
 }
 
+oop CollectedHeap::array_allocate_nozero(KlassHandle klass,
+                                         int size,
+                                         int length,
+                                         TRAPS) {
+  debug_only(check_for_valid_allocation_state());
+  assert(!Universe::heap()->is_gc_active(), "Allocation during gc not allowed");
+  assert(size >= 0, "int won't convert to size_t");
+  HeapWord* obj = common_mem_allocate_noinit(size, CHECK_NULL);
+  ((oop)obj)->set_klass_gap(0);
+  post_allocation_setup_array(klass, obj, size, length);
+#ifndef PRODUCT
+  const size_t hs = oopDesc::header_size()+1;
+  Universe::heap()->check_for_non_bad_heap_word_value(obj+hs, size-hs);
+#endif
+  return (oop)obj;
+}
+
 oop CollectedHeap::permanent_obj_allocate(KlassHandle klass, int size, TRAPS) {
   oop obj = permanent_obj_allocate_no_klass_install(klass, size, CHECK_NULL);
   post_allocation_install_obj_klass(klass, obj, size);
--- a/hotspot/src/share/vm/memory/oopFactory.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/memory/oopFactory.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -77,7 +77,14 @@
 typeArrayOop oopFactory::new_typeArray(BasicType type, int length, TRAPS) {
   klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
   typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
-  typeArrayOop result = type_asArrayKlass->allocate(length, THREAD);
+  typeArrayOop result = type_asArrayKlass->allocate_common(length, true, THREAD);
+  return result;
+}
+
+typeArrayOop oopFactory::new_typeArray_nozero(BasicType type, int length, TRAPS) {
+  klassOop type_asKlassOop = Universe::typeArrayKlassObj(type);
+  typeArrayKlass* type_asArrayKlass = typeArrayKlass::cast(type_asKlassOop);
+  typeArrayOop result = type_asArrayKlass->allocate_common(length, false, THREAD);
   return result;
 }
 
--- a/hotspot/src/share/vm/memory/oopFactory.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/memory/oopFactory.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -63,6 +63,7 @@
   static typeArrayOop    new_permanent_intArray  (int length, TRAPS);  // used for class file structures
 
   static typeArrayOop    new_typeArray(BasicType type, int length, TRAPS);
+  static typeArrayOop    new_typeArray_nozero(BasicType type, int length, TRAPS);
 
   // Constant pools
   static constantPoolOop      new_constantPool     (int length,
--- a/hotspot/src/share/vm/oops/typeArrayKlass.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -76,7 +76,7 @@
   return k();
 }
 
-typeArrayOop typeArrayKlass::allocate(int length, TRAPS) {
+typeArrayOop typeArrayKlass::allocate_common(int length, bool do_zero, TRAPS) {
   assert(log2_element_size() >= 0, "bad scale");
   if (length >= 0) {
     if (length <= max_length()) {
@@ -84,7 +84,11 @@
       KlassHandle h_k(THREAD, as_klassOop());
       typeArrayOop t;
       CollectedHeap* ch = Universe::heap();
-      t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
+      if (do_zero) {
+        t = (typeArrayOop)CollectedHeap::array_allocate(h_k, (int)size, length, CHECK_NULL);
+      } else {
+        t = (typeArrayOop)CollectedHeap::array_allocate_nozero(h_k, (int)size, length, CHECK_NULL);
+      }
       assert(t->is_parsable(), "Don't publish unless parsable");
       return t;
     } else {
--- a/hotspot/src/share/vm/oops/typeArrayKlass.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/oops/typeArrayKlass.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -56,7 +56,8 @@
   bool compute_is_subtype_of(klassOop k);
 
   // Allocation
-  typeArrayOop allocate(int length, TRAPS);
+  typeArrayOop allocate_common(int length, bool do_zero, TRAPS);
+  typeArrayOop allocate(int length, TRAPS) { return allocate_common(length, true, THREAD); }
   typeArrayOop allocate_permanent(int length, TRAPS);  // used for class file structures
   oop multi_allocate(int rank, jint* sizes, TRAPS);
 
--- a/hotspot/src/share/vm/opto/library_call.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -4658,6 +4658,7 @@
     // "You break it, you buy it."
     InitializeNode* init = alloc->initialization();
     assert(init->is_complete(), "we just did this");
+    init->set_complete_with_arraycopy();
     assert(dest->is_CheckCastPP(), "sanity");
     assert(dest->in(0)->in(0) == init, "dest pinned");
     adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
--- a/hotspot/src/share/vm/opto/macro.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/macro.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -1685,9 +1685,21 @@
 
 void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
   Node* length = alloc->in(AllocateNode::ALength);
+  InitializeNode* init = alloc->initialization();
+  Node* klass_node = alloc->in(AllocateNode::KlassNode);
+  ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
+  address slow_call_address;  // Address of slow call
+  if (init != NULL && init->is_complete_with_arraycopy() &&
+      k->is_type_array_klass()) {
+    // Don't zero type array during slow allocation in VM since
+    // it will be initialized later by arraycopy in compiled code.
+    slow_call_address = OptoRuntime::new_array_nozero_Java();
+  } else {
+    slow_call_address = OptoRuntime::new_array_Java();
+  }
   expand_allocate_common(alloc, length,
                          OptoRuntime::new_array_Type(),
-                         OptoRuntime::new_array_Java());
+                         slow_call_address);
 }
 
 //-----------------------mark_eliminated_locking_nodes-----------------------
--- a/hotspot/src/share/vm/opto/memnode.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/memnode.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -2847,7 +2847,7 @@
 
 //---------------------------InitializeNode------------------------------------
 InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
-  : _is_complete(false),
+  : _is_complete(Incomplete),
     MemBarNode(C, adr_type, rawoop)
 {
   init_class_id(Class_Initialize);
@@ -2885,7 +2885,7 @@
 
 void InitializeNode::set_complete(PhaseGVN* phase) {
   assert(!is_complete(), "caller responsibility");
-  _is_complete = true;
+  _is_complete = Complete;
 
   // After this node is complete, it contains a bunch of
   // raw-memory initializations.  There is no need for
--- a/hotspot/src/share/vm/opto/memnode.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/memnode.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -942,7 +942,12 @@
 class InitializeNode: public MemBarNode {
   friend class AllocateNode;
 
-  bool _is_complete;
+  enum {
+    Incomplete    = 0,
+    Complete      = 1,
+    WithArraycopy = 2
+  };
+  int _is_complete;
 
 public:
   enum {
@@ -976,10 +981,12 @@
   // An InitializeNode must completed before macro expansion is done.
   // Completion requires that the AllocateNode must be followed by
   // initialization of the new memory to zero, then to any initializers.
-  bool is_complete() { return _is_complete; }
+  bool is_complete() { return _is_complete != Incomplete; }
+  bool is_complete_with_arraycopy() { return (_is_complete & WithArraycopy) != 0; }
 
   // Mark complete.  (Must not yet be complete.)
   void set_complete(PhaseGVN* phase);
+  void set_complete_with_arraycopy() { _is_complete = Complete | WithArraycopy; }
 
 #ifdef ASSERT
   // ensure all non-degenerate stores are ordered and non-overlapping
--- a/hotspot/src/share/vm/opto/runtime.cpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Mon Sep 26 10:24:05 2011 -0700
@@ -102,6 +102,7 @@
 // Compiled code entry points
 address OptoRuntime::_new_instance_Java                           = NULL;
 address OptoRuntime::_new_array_Java                              = NULL;
+address OptoRuntime::_new_array_nozero_Java                       = NULL;
 address OptoRuntime::_multianewarray2_Java                        = NULL;
 address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
@@ -151,6 +152,7 @@
   // -------------------------------------------------------------------------------------------------------------------------------
   gen(env, _new_instance_Java              , new_instance_Type            , new_instance_C                  ,    0 , true , false, false);
   gen(env, _new_array_Java                 , new_array_Type               , new_array_C                     ,    0 , true , false, false);
+  gen(env, _new_array_nozero_Java          , new_array_Type               , new_array_nozero_C              ,    0 , true , false, false);
   gen(env, _multianewarray2_Java           , multianewarray2_Type         , multianewarray2_C               ,    0 , true , false, false);
   gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
   gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
@@ -308,6 +310,36 @@
   }
 JRT_END
 
+// array allocation without zeroing
+JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(klassOopDesc* array_type, int len, JavaThread *thread))
+  JRT_BLOCK;
+#ifndef PRODUCT
+  SharedRuntime::_new_array_ctr++;            // new array requires GC
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+
+  // Scavenge and allocate an instance.
+  oop result;
+
+  assert(Klass::cast(array_type)->oop_is_typeArray(), "should be called only for type array");
+  // The oopFactory likes to work with the element type.
+  BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
+  result = oopFactory::new_typeArray_nozero(elem_type, len, THREAD);
+
+  // Pass oops back through thread local storage.  Our apparent type to Java
+  // is that we return an oop, but we can block on exit from this routine and
+  // a GC can trash the oop in C's return register.  The generated stub will
+  // fetch the oop from TLS after any possible GC.
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(result);
+  JRT_BLOCK_END;
+
+  if (GraphKit::use_ReduceInitialCardMarks()) {
+    // inform GC that we won't do card marks for initializing writes.
+    new_store_pre_barrier(thread);
+  }
+JRT_END
+
 // Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
 
 // multianewarray for 2 dimensions
--- a/hotspot/src/share/vm/opto/runtime.hpp	Sun Sep 25 16:03:29 2011 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Mon Sep 26 10:24:05 2011 -0700
@@ -114,6 +114,7 @@
   // References to generated stubs
   static address _new_instance_Java;
   static address _new_array_Java;
+  static address _new_array_nozero_Java;
   static address _multianewarray2_Java;
   static address _multianewarray3_Java;
   static address _multianewarray4_Java;
@@ -143,6 +144,7 @@
 
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
+  static void new_array_nozero_C(klassOopDesc* array_klass, int len, JavaThread *thread);
 
   // Post-slow-path-allocation, pre-initializing-stores step for
   // implementing ReduceInitialCardMarks
@@ -208,6 +210,7 @@
   // access to runtime stubs entry points for java code
   static address new_instance_Java()                     { return _new_instance_Java; }
   static address new_array_Java()                        { return _new_array_Java; }
+  static address new_array_nozero_Java()                 { return _new_array_nozero_Java; }
   static address multianewarray2_Java()                  { return _multianewarray2_Java; }
   static address multianewarray3_Java()                  { return _multianewarray3_Java; }
   static address multianewarray4_Java()                  { return _multianewarray4_Java; }