6896647: card marks can be deferred too long
authorysr
Wed, 13 Jan 2010 15:26:39 -0800
changeset 4637 af4d405aacc1
parent 4636 90e004691873
child 4639 6e62fb1848d3
6896647: card marks can be deferred too long Summary: Deferred card marks are now flushed during the gc prologue. Parallel[Scavege,OldGC] and SerialGC no longer defer card marks generated by COMPILER2 as a result of ReduceInitialCardMarks. For these cases, introduced a diagnostic option to defer the card marks, only for the purposes of testing and diagnostics. CMS and G1 continue to defer card marks. Potential performance concern related to single-threaded flushing of deferred card marks in the gc prologue will be addressed in the future. Reviewed-by: never, johnc
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
hotspot/src/share/vm/gc_interface/collectedHeap.cpp
hotspot/src/share/vm/gc_interface/collectedHeap.hpp
hotspot/src/share/vm/memory/genCollectedHeap.cpp
hotspot/src/share/vm/memory/genCollectedHeap.hpp
hotspot/src/share/vm/opto/graphKit.cpp
hotspot/src/share/vm/opto/runtime.cpp
hotspot/src/share/vm/opto/runtime.hpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/thread.cpp
hotspot/src/share/vm/runtime/vmStructs.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -1441,6 +1441,7 @@
 }
 
 jint G1CollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
   os::enable_vtime();
 
   // Necessary to satisfy locking discipline assertions.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -1007,6 +1007,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return true;
+  }
+
   bool is_in_young(oop obj) {
     HeapRegion* hr = heap_region_containing(obj);
     return hr != NULL && hr->is_young();
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint ParallelScavengeHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   // Cannot be initialized until after the flags are parsed
   GenerationSizer flag_parser;
 
@@ -717,10 +719,6 @@
   return young_gen()->allocate(size, true);
 }
 
-void ParallelScavengeHeap::fill_all_tlabs(bool retire) {
-  CollectedHeap::fill_all_tlabs(retire);
-}
-
 void ParallelScavengeHeap::accumulate_statistics_all_tlabs() {
   CollectedHeap::accumulate_statistics_all_tlabs();
 }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -54,7 +54,6 @@
  protected:
   static inline size_t total_invocations();
   HeapWord* allocate_new_tlab(size_t size);
-  void fill_all_tlabs(bool retire);
 
  public:
   ParallelScavengeHeap() : CollectedHeap() {
@@ -191,6 +190,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return false;
+  }
+
   // Return true if we don't we need a store barrier for
   // initializing stores to an object at this address.
   virtual bool can_elide_initializing_store_barrier(oop new_obj);
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -59,8 +59,18 @@
                 PerfDataManager::create_string_variable(SUN_GC, "lastCause",
                              80, GCCause::to_string(_gc_lastcause), CHECK);
   }
+  _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below.
 }
 
+void CollectedHeap::pre_initialize() {
+  // Used for ReduceInitialCardMarks (when COMPILER2 is used);
+  // otherwise remains unused.
+#ifdef COMPLER2
+  _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store());
+#else
+  assert(_defer_initial_card_mark == false, "Who would set it?");
+#endif
+}
 
 #ifndef PRODUCT
 void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) {
@@ -140,12 +150,13 @@
 void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
   MemRegion deferred = thread->deferred_card_mark();
   if (!deferred.is_empty()) {
+    assert(_defer_initial_card_mark, "Otherwise should be empty");
     {
       // Verify that the storage points to a parsable object in heap
       DEBUG_ONLY(oop old_obj = oop(deferred.start());)
       assert(is_in(old_obj), "Not in allocated heap");
       assert(!can_elide_initializing_store_barrier(old_obj),
-             "Else should have been filtered in defer_store_barrier()");
+             "Else should have been filtered in new_store_pre_barrier()");
       assert(!is_in_permanent(old_obj), "Sanity: not expected");
       assert(old_obj->is_oop(true), "Not an oop");
       assert(old_obj->is_parsable(), "Will not be concurrently parsable");
@@ -174,9 +185,7 @@
 //     so long as the card-mark is completed before the next
 //     scavenge. For all these cases, we can do a card mark
 //     at the point at which we do a slow path allocation
-//     in the old gen. For uniformity, however, we end
-//     up using the same scheme (see below) for all three
-//     cases (deferring the card-mark appropriately).
+//     in the old gen, i.e. in this call.
 // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
 //     in addition that the card-mark for an old gen allocated
 //     object strictly follow any associated initializing stores.
@@ -199,12 +208,13 @@
 //     but, like in CMS, because of the presence of concurrent refinement
 //     (much like CMS' precleaning), must strictly follow the oop-store.
 //     Thus, using the same protocol for maintaining the intended
-//     invariants turns out, serendepitously, to be the same for all
-//     three collectors/heap types above.
+//     invariants turns out, serendepitously, to be the same for both
+//     G1 and CMS.
 //
-// For each future collector, this should be reexamined with
-// that specific collector in mind.
-oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+// For any future collector, this code should be reexamined with
+// that specific collector in mind, and the documentation above suitably
+// extended and updated.
+oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
   // If a previous card-mark was deferred, flush it now.
   flush_deferred_store_barrier(thread);
   if (can_elide_initializing_store_barrier(new_obj)) {
@@ -212,10 +222,17 @@
     // following the flush above.
     assert(thread->deferred_card_mark().is_empty(), "Error");
   } else {
-    // Remember info for the newly deferred store barrier
-    MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
-    assert(!deferred.is_empty(), "Error");
-    thread->set_deferred_card_mark(deferred);
+    MemRegion mr((HeapWord*)new_obj, new_obj->size());
+    assert(!mr.is_empty(), "Error");
+    if (_defer_initial_card_mark) {
+      // Defer the card mark
+      thread->set_deferred_card_mark(mr);
+    } else {
+      // Do the card mark
+      BarrierSet* bs = barrier_set();
+      assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+      bs->write_region(mr);
+    }
   }
   return new_obj;
 }
@@ -313,22 +330,6 @@
   return NULL;
 }
 
-void CollectedHeap::fill_all_tlabs(bool retire) {
-  assert(UseTLAB, "should not reach here");
-  // See note in ensure_parsability() below.
-  assert(SafepointSynchronize::is_at_safepoint() ||
-         !is_init_completed(),
-         "should only fill tlabs at safepoint");
-  // The main thread starts allocating via a TLAB even before it
-  // has added itself to the threads list at vm boot-up.
-  assert(Threads::first() != NULL,
-         "Attempt to fill tlabs before main thread has been added"
-         " to threads list is doomed to failure!");
-  for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
-     thread->tlab().make_parsable(retire);
-  }
-}
-
 void CollectedHeap::ensure_parsability(bool retire_tlabs) {
   // The second disjunct in the assertion below makes a concession
   // for the start-up verification done while the VM is being
@@ -343,8 +344,24 @@
          "Should only be called at a safepoint or at start-up"
          " otherwise concurrent mutator activity may make heap "
          " unparsable again");
-  if (UseTLAB) {
-    fill_all_tlabs(retire_tlabs);
+  const bool use_tlab = UseTLAB;
+  const bool deferred = _defer_initial_card_mark;
+  // The main thread starts allocating via a TLAB even before it
+  // has added itself to the threads list at vm boot-up.
+  assert(!use_tlab || Threads::first() != NULL,
+         "Attempt to fill tlabs before main thread has been added"
+         " to threads list is doomed to failure!");
+  for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) {
+     if (use_tlab) thread->tlab().make_parsable(retire_tlabs);
+#ifdef COMPILER2
+     // The deferred store barriers must all have been flushed to the
+     // card-table (or other remembered set structure) before GC starts
+     // processing the card-table (or other remembered set).
+     if (deferred) flush_deferred_store_barrier(thread);
+#else
+     assert(!deferred, "Should be false");
+     assert(thread->deferred_card_mark().is_empty(), "Should be empty");
+#endif
   }
 }
 
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -51,6 +51,9 @@
   // Used for filler objects (static, but initialized in ctor).
   static size_t _filler_array_max_size;
 
+  // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used
+  bool _defer_initial_card_mark;
+
  protected:
   MemRegion _reserved;
   BarrierSet* _barrier_set;
@@ -70,13 +73,16 @@
   // Constructor
   CollectedHeap();
 
+  // Do common initializations that must follow instance construction,
+  // for example, those needing virtual calls.
+  // This code could perhaps be moved into initialize() but would
+  // be slightly more awkward because we want the latter to be a
+  // pure virtual.
+  void pre_initialize();
+
   // Create a new tlab
   virtual HeapWord* allocate_new_tlab(size_t size);
 
-  // Fix up tlabs to make the heap well-formed again,
-  // optionally retiring the tlabs.
-  virtual void fill_all_tlabs(bool retire);
-
   // Accumulate statistics on all tlabs.
   virtual void accumulate_statistics_all_tlabs();
 
@@ -431,14 +437,25 @@
   // promises to call this function on such a slow-path-allocated
   // object before performing initializations that have elided
   // store barriers. Returns new_obj, or maybe a safer copy thereof.
-  virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+  virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj);
 
   // Answers whether an initializing store to a new object currently
-  // allocated at the given address doesn't need a (deferred) store
+  // allocated at the given address doesn't need a store
   // barrier. Returns "true" if it doesn't need an initializing
   // store barrier; answers "false" if it does.
   virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
 
+  // If a compiler is eliding store barriers for TLAB-allocated objects,
+  // we will be informed of a slow-path allocation by a call
+  // to new_store_pre_barrier() above. Such a call precedes the
+  // initialization of the object itself, and no post-store-barriers will
+  // be issued. Some heap types require that the barrier strictly follows
+  // the initializing stores. (This is currently implemented by deferring the
+  // barrier until the next slow-path allocation or gc-related safepoint.)
+  // This interface answers whether a particular heap type needs the card
+  // mark to be thus strictly sequenced after the stores.
+  virtual bool card_mark_must_follow_store() const = 0;
+
   // If the CollectedHeap was asked to defer a store barrier above,
   // this informs it to flush such a deferred store barrier to the
   // remembered set.
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -51,6 +51,8 @@
 }
 
 jint GenCollectedHeap::initialize() {
+  CollectedHeap::pre_initialize();
+
   int i;
   _n_gens = gen_policy()->number_of_generations();
 
@@ -129,6 +131,7 @@
 
   _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions);
   set_barrier_set(rem_set()->bs());
+
   _gch = this;
 
   for (i = 0; i < _n_gens; i++) {
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -260,6 +260,10 @@
     return true;
   }
 
+  virtual bool card_mark_must_follow_store() const {
+    return UseConcMarkSweepGC;
+  }
+
   // We don't need barriers for stores to objects in the
   // young gen and, a fortiori, for initializing stores to
   // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
--- a/hotspot/src/share/vm/opto/graphKit.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/opto/graphKit.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -3259,9 +3259,10 @@
   if (use_ReduceInitialCardMarks()
       && obj == just_allocated_object(control())) {
     // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps
-    // so as to make this card-mark elision safe.
+    // Keep this code in sync with new_store_pre_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
     return;
   }
 
--- a/hotspot/src/share/vm/opto/runtime.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -143,7 +143,7 @@
 // We failed the fast-path allocation.  Now we need to do a scavenge or GC
 // and try allocation again.
 
-void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
+void OptoRuntime::new_store_pre_barrier(JavaThread* thread) {
   // After any safepoint, just before going back to compiled code,
   // we inform the GC that we will be doing initializing writes to
   // this object in the future without emitting card-marks, so
@@ -156,7 +156,7 @@
   assert(Universe::heap()->can_elide_tlab_store_barriers(),
          "compiler must check this first");
   // GC may decide to give back a safer copy of new_obj.
-  new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
+  new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj);
   thread->set_vm_result(new_obj);
 }
 
@@ -200,7 +200,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
@@ -239,7 +239,7 @@
 
   if (GraphKit::use_ReduceInitialCardMarks()) {
     // inform GC that we won't do card marks for initializing writes.
-    maybe_defer_card_mark(thread);
+    new_store_pre_barrier(thread);
   }
 JRT_END
 
--- a/hotspot/src/share/vm/opto/runtime.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -133,8 +133,9 @@
   // Allocate storage for a objArray or typeArray
   static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
 
-  // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
-  static void maybe_defer_card_mark(JavaThread* thread);
+  // Post-slow-path-allocation, pre-initializing-stores step for
+  // implementing ReduceInitialCardMarks
+  static void new_store_pre_barrier(JavaThread* thread);
 
   // Allocate storage for a multi-dimensional arrays
   // Note: needs to be fixed for arbitrary number of dimensions
--- a/hotspot/src/share/vm/runtime/globals.hpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jan 13 15:26:39 2010 -0800
@@ -2015,6 +2015,10 @@
   diagnostic(bool, GCParallelVerificationEnabled, true,                     \
           "Enable parallel memory system verification")                     \
                                                                             \
+  diagnostic(bool, DeferInitialCardMark, false,                             \
+          "When +ReduceInitialCardMarks, explicitly defer any that "        \
+           "may arise from new_pre_store_barrier")                          \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
--- a/hotspot/src/share/vm/runtime/thread.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/runtime/thread.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -2357,9 +2357,8 @@
 };
 
 void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
-  // Flush deferred store-barriers, if any, associated with
-  // initializing stores done by this JavaThread in the current epoch.
-  Universe::heap()->flush_deferred_store_barrier(this);
+  // Verify that the deferred card marks have been flushed.
+  assert(deferred_card_mark().is_empty(), "Should be empty during GC");
 
   // The ThreadProfiler oops_do is done from FlatProfiler::oops_do
   // since there may be more than one thread using each ThreadProfiler.
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp	Tue Jan 12 14:56:46 2010 -0800
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp	Wed Jan 13 15:26:39 2010 -0800
@@ -309,6 +309,7 @@
   nonstatic_field(CollectedHeap,               _reserved,                                     MemRegion)                             \
   nonstatic_field(SharedHeap,                  _perm_gen,                                     PermGen*)                              \
   nonstatic_field(CollectedHeap,               _barrier_set,                                  BarrierSet*)                           \
+  nonstatic_field(CollectedHeap,               _defer_initial_card_mark,                      bool)                                  \
   nonstatic_field(CollectedHeap,               _is_gc_active,                                 bool)                                  \
   nonstatic_field(CompactibleSpace,            _compaction_top,                               HeapWord*)                             \
   nonstatic_field(CompactibleSpace,            _first_dead,                                   HeapWord*)                             \