6888898: CMS: ReduceInitialCardMarks unsafe in the presence of cms precleaning
6889757: G1: enable card mark elision for initializing writes from compiled code (ReduceInitialCardMarks)
Summary: Defer the (compiler-elided) card-mark upon a slow-path allocation until after the store and before the next subsequent safepoint; G1 now answers yes to can_elide_tlab_write_barriers().
Reviewed-by: jcoomes, kvn, never
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -992,11 +992,39 @@
// Can a compiler initialize a new object without store barriers?
// This permission only extends from the creation of a new object
- // via a TLAB up to the first subsequent safepoint.
+ // via a TLAB up to the first subsequent safepoint. If such permission
+ // is granted for this heap type, the compiler promises to call
+ // defer_store_barrier() below on any slow path allocation of
+ // a new object for which such initializing store barriers will
+ // have been elided. G1, like CMS, allows this, but should be
+ // ready to provide a compensating write barrier as necessary
+ // if that storage came out of a non-young region. The efficiency
+ // of this implementation depends crucially on being able to
+ // answer very efficiently in constant time whether a piece of
+ // storage in the heap comes from a young region or not.
+ // See ReduceInitialCardMarks.
virtual bool can_elide_tlab_store_barriers() const {
- // Since G1's TLAB's may, on occasion, come from non-young regions
- // as well. (Is there a flag controlling that? XXX)
- return false;
+ return true;
+ }
+
+ bool is_in_young(oop obj) {
+ HeapRegion* hr = heap_region_containing(obj);
+ return hr != NULL && hr->is_young();
+ }
+
+ // We don't need barriers for initializing stores to objects
+ // in the young gen: for the SATB pre-barrier, there is no
+ // pre-value that needs to be remembered; for the remembered-set
+ // update logging post-barrier, we don't maintain remembered set
+ // information for young gen objects. Note that non-generational
+ // G1 does not have any "young" objects, should not elide
+ // the rs logging barrier and so should always answer false below.
+ // However, non-generational G1 (-XX:-G1Gen) appears to have
+ // bit-rotted so was not tested below.
+ virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+ assert(G1Gen || !is_in_young(new_obj),
+ "Non-generational G1 should never return true below");
+ return is_in_young(new_obj);
}
// Can a compiler elide a store barrier when it writes
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -314,41 +314,6 @@
return false;
}
-// Static method
-bool ParallelScavengeHeap::is_in_young(oop* p) {
- ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
- assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
- "Must be ParallelScavengeHeap");
-
- PSYoungGen* young_gen = heap->young_gen();
-
- if (young_gen->is_in_reserved(p)) {
- return true;
- }
-
- return false;
-}
-
-// Static method
-bool ParallelScavengeHeap::is_in_old_or_perm(oop* p) {
- ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
- assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
- "Must be ParallelScavengeHeap");
-
- PSOldGen* old_gen = heap->old_gen();
- PSPermGen* perm_gen = heap->perm_gen();
-
- if (old_gen->is_in_reserved(p)) {
- return true;
- }
-
- if (perm_gen->is_in_reserved(p)) {
- return true;
- }
-
- return false;
-}
-
// There are two levels of allocation policy here.
//
// When an allocation request fails, the requesting thread must invoke a VM
@@ -764,6 +729,13 @@
CollectedHeap::resize_all_tlabs();
}
+bool ParallelScavengeHeap::can_elide_initializing_store_barrier(oop new_obj) {
+ // We don't need barriers for stores to objects in the
+ // young gen and, a fortiori, for initializing stores to
+ // objects therein.
+ return is_in_young(new_obj);
+}
+
// This method is used by System.gc() and JVMTI.
void ParallelScavengeHeap::collect(GCCause::Cause cause) {
assert(!Heap_lock->owned_by_self(),
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -129,8 +129,8 @@
return perm_gen()->is_in(p);
}
- static bool is_in_young(oop *p); // reserved part
- static bool is_in_old_or_perm(oop *p); // reserved part
+ inline bool is_in_young(oop p); // reserved part
+ inline bool is_in_old_or_perm(oop p); // reserved part
// Memory allocation. "gc_time_limit_was_exceeded" will
// be set to true if the adaptive size policy determine that
@@ -191,6 +191,10 @@
return true;
}
+ // Return true if we don't we need a store barrier for
+ // initializing stores to an object at this address.
+ virtual bool can_elide_initializing_store_barrier(oop new_obj);
+
// Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler
// is storing x to the heap, where x->is_perm() is true.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -41,3 +41,11 @@
PSMarkSweep::invoke(maximum_compaction);
}
}
+
+inline bool ParallelScavengeHeap::is_in_young(oop p) {
+ return young_gen()->is_in_reserved(p);
+}
+
+inline bool ParallelScavengeHeap::is_in_old_or_perm(oop p) {
+ return old_gen()->is_in_reserved(p) || perm_gen()->is_in_reserved(p);
+}
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -137,6 +137,89 @@
return obj;
}
+void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
+ MemRegion deferred = thread->deferred_card_mark();
+ if (!deferred.is_empty()) {
+ {
+ // Verify that the storage points to a parsable object in heap
+ DEBUG_ONLY(oop old_obj = oop(deferred.start());)
+ assert(is_in(old_obj), "Not in allocated heap");
+ assert(!can_elide_initializing_store_barrier(old_obj),
+ "Else should have been filtered in defer_store_barrier()");
+ assert(!is_in_permanent(old_obj), "Sanity: not expected");
+ assert(old_obj->is_oop(true), "Not an oop");
+ assert(old_obj->is_parsable(), "Will not be concurrently parsable");
+ assert(deferred.word_size() == (size_t)(old_obj->size()),
+ "Mismatch: multiple objects?");
+ }
+ BarrierSet* bs = barrier_set();
+ assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
+ bs->write_region(deferred);
+ // "Clear" the deferred_card_mark field
+ thread->set_deferred_card_mark(MemRegion());
+ }
+ assert(thread->deferred_card_mark().is_empty(), "invariant");
+}
+
+// Helper for ReduceInitialCardMarks. For performance,
+// compiled code may elide card-marks for initializing stores
+// to a newly allocated object along the fast-path. We
+// compensate for such elided card-marks as follows:
+// (a) Generational, non-concurrent collectors, such as
+// GenCollectedHeap(ParNew,DefNew,Tenured) and
+// ParallelScavengeHeap(ParallelGC, ParallelOldGC)
+// need the card-mark if and only if the region is
+// in the old gen, and do not care if the card-mark
+// succeeds or precedes the initializing stores themselves,
+// so long as the card-mark is completed before the next
+// scavenge. For all these cases, we can do a card mark
+// at the point at which we do a slow path allocation
+// in the old gen. For uniformity, however, we end
+// up using the same scheme (see below) for all three
+// cases (deferring the card-mark appropriately).
+// (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
+// in addition that the card-mark for an old gen allocated
+// object strictly follow any associated initializing stores.
+// In these cases, the memRegion remembered below is
+// used to card-mark the entire region either just before the next
+// slow-path allocation by this thread or just before the next scavenge or
+// CMS-associated safepoint, whichever of these events happens first.
+// (The implicit assumption is that the object has been fully
+// initialized by this point, a fact that we assert when doing the
+// card-mark.)
+// (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
+// G1 concurrent marking is in progress an SATB (pre-write-)barrier is
+// is used to remember the pre-value of any store. Initializing
+// stores will not need this barrier, so we need not worry about
+// compensating for the missing pre-barrier here. Turning now
+// to the post-barrier, we note that G1 needs a RS update barrier
+// which simply enqueues a (sequence of) dirty cards which may
+// optionally be refined by the concurrent update threads. Note
+// that this barrier need only be applied to a non-young write,
+// but, like in CMS, because of the presence of concurrent refinement
+// (much like CMS' precleaning), must strictly follow the oop-store.
+// Thus, using the same protocol for maintaining the intended
+// invariants turns out, serendepitously, to be the same for all
+// three collectors/heap types above.
+//
+// For each future collector, this should be reexamined with
+// that specific collector in mind.
+oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
+ // If a previous card-mark was deferred, flush it now.
+ flush_deferred_store_barrier(thread);
+ if (can_elide_initializing_store_barrier(new_obj)) {
+ // The deferred_card_mark region should be empty
+ // following the flush above.
+ assert(thread->deferred_card_mark().is_empty(), "Error");
+ } else {
+ // Remember info for the newly deferred store barrier
+ MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
+ assert(!deferred.is_empty(), "Error");
+ thread->set_deferred_card_mark(deferred);
+ }
+ return new_obj;
+}
+
size_t CollectedHeap::filler_array_hdr_size() {
return size_t(arrayOopDesc::header_size(T_INT));
}
@@ -225,16 +308,6 @@
fill_with_object_impl(start, words);
}
-oop CollectedHeap::new_store_barrier(oop new_obj) {
- // %%% This needs refactoring. (It was imported from the server compiler.)
- guarantee(can_elide_tlab_store_barriers(), "store barrier elision not supported");
- BarrierSet* bs = this->barrier_set();
- assert(bs->has_write_region_opt(), "Barrier set does not have write_region");
- int new_size = new_obj->size();
- bs->write_region(MemRegion((HeapWord*)new_obj, new_size));
- return new_obj;
-}
-
HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
guarantee(false, "thread-local allocation buffers not supported");
return NULL;
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -415,9 +415,14 @@
guarantee(false, "thread-local allocation buffers not supported");
return 0;
}
+
// Can a compiler initialize a new object without store barriers?
// This permission only extends from the creation of a new object
- // via a TLAB up to the first subsequent safepoint.
+ // via a TLAB up to the first subsequent safepoint. If such permission
+ // is granted for this heap type, the compiler promises to call
+ // defer_store_barrier() below on any slow path allocation of
+ // a new object for which such initializing store barriers will
+ // have been elided.
virtual bool can_elide_tlab_store_barriers() const = 0;
// If a compiler is eliding store barriers for TLAB-allocated objects,
@@ -425,8 +430,19 @@
// an object allocated anywhere. The compiler's runtime support
// promises to call this function on such a slow-path-allocated
// object before performing initializations that have elided
- // store barriers. Returns new_obj, or maybe a safer copy thereof.
- virtual oop new_store_barrier(oop new_obj);
+ // store barriers. Returns new_obj, or maybe a safer copy thereof.
+ virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
+
+ // Answers whether an initializing store to a new object currently
+ // allocated at the given address doesn't need a (deferred) store
+ // barrier. Returns "true" if it doesn't need an initializing
+ // store barrier; answers "false" if it does.
+ virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
+
+ // If the CollectedHeap was asked to defer a store barrier above,
+ // this informs it to flush such a deferred store barrier to the
+ // remembered set.
+ virtual void flush_deferred_store_barrier(JavaThread* thread);
// Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -260,6 +260,17 @@
return true;
}
+ // We don't need barriers for stores to objects in the
+ // young gen and, a fortiori, for initializing stores to
+ // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
+ // only and may need to be re-examined in case other
+ // kinds of collectors are implemented in the future.
+ virtual bool can_elide_initializing_store_barrier(oop new_obj) {
+ assert(UseParNewGC || UseSerialGC || UseConcMarkSweepGC,
+ "Check can_elide_initializing_store_barrier() for this collector");
+ return is_in_youngest((void*)new_obj);
+ }
+
// Can a compiler elide a store barrier when it writes
// a permanent oop into the heap? Applies when the compiler
// is storing x to the heap, where x->is_perm() is true.
--- a/hotspot/src/share/vm/opto/graphKit.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/opto/graphKit.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -3186,6 +3186,15 @@
return;
}
+ if (use_ReduceInitialCardMarks()
+ && obj == just_allocated_object(control())) {
+ // We can skip marks on a freshly-allocated object in Eden.
+ // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
+ // That routine informs GC to take appropriate compensating steps
+ // so as to make this card-mark elision safe.
+ return;
+ }
+
if (!use_precise) {
// All card marks for a (non-array) instance are in one place:
adr = obj;
--- a/hotspot/src/share/vm/opto/library_call.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/opto/library_call.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -4160,13 +4160,13 @@
result_mem ->set_req(_objArray_path, reset_memory());
}
}
- // We can dispense with card marks if we know the allocation
- // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
- // causes the non-eden paths to simulate a fresh allocation,
- // insofar that no further card marks are required to initialize
- // the object.
-
// Otherwise, there are no card marks to worry about.
+ // (We can dispense with card marks if we know the allocation
+ // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
+ // causes the non-eden paths to take compensating steps to
+ // simulate a fresh allocation, so that no further
+ // card marks are required in compiled code to initialize
+ // the object.)
if (!stopped()) {
copy_to_clone(obj, alloc_obj, obj_size, true, false);
--- a/hotspot/src/share/vm/opto/runtime.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/opto/runtime.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -143,18 +143,20 @@
// We failed the fast-path allocation. Now we need to do a scavenge or GC
// and try allocation again.
-void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
+void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
// After any safepoint, just before going back to compiled code,
- // we perform a card mark. This lets the compiled code omit
- // card marks for initialization of new objects.
- // Keep this code consistent with GraphKit::store_barrier.
+ // we inform the GC that we will be doing initializing writes to
+ // this object in the future without emitting card-marks, so
+ // GC may take any compensating steps.
+ // NOTE: Keep this code consistent with GraphKit::store_barrier.
oop new_obj = thread->vm_result();
if (new_obj == NULL) return;
assert(Universe::heap()->can_elide_tlab_store_barriers(),
"compiler must check this first");
- new_obj = Universe::heap()->new_store_barrier(new_obj);
+ // GC may decide to give back a safer copy of new_obj.
+ new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
thread->set_vm_result(new_obj);
}
@@ -197,8 +199,8 @@
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
- // do them now so we don't have to do them on the fast path
- do_eager_card_mark(thread);
+ // inform GC that we won't do card marks for initializing writes.
+ maybe_defer_card_mark(thread);
}
JRT_END
@@ -236,8 +238,8 @@
JRT_BLOCK_END;
if (GraphKit::use_ReduceInitialCardMarks()) {
- // do them now so we don't have to do them on the fast path
- do_eager_card_mark(thread);
+ // inform GC that we won't do card marks for initializing writes.
+ maybe_defer_card_mark(thread);
}
JRT_END
--- a/hotspot/src/share/vm/opto/runtime.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/opto/runtime.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -133,8 +133,8 @@
// Allocate storage for a objArray or typeArray
static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
- // Post-allocation step for implementing ReduceInitialCardMarks:
- static void do_eager_card_mark(JavaThread* thread);
+ // Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
+ static void maybe_defer_card_mark(JavaThread* thread);
// Allocate storage for a multi-dimensional arrays
// Note: needs to be fixed for arbitrary number of dimensions
--- a/hotspot/src/share/vm/runtime/thread.cpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/runtime/thread.cpp Fri Oct 16 02:05:46 2009 -0700
@@ -1213,6 +1213,7 @@
{
initialize();
_is_attaching = is_attaching;
+ assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
}
bool JavaThread::reguard_stack(address cur_sp) {
@@ -2318,6 +2319,10 @@
void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
+ // Flush deferred store-barriers, if any, associated with
+ // initializing stores done by this JavaThread in the current epoch.
+ Universe::heap()->flush_deferred_store_barrier(this);
+
// The ThreadProfiler oops_do is done from FlatProfiler::oops_do
// since there may be more than one thread using each ThreadProfiler.
--- a/hotspot/src/share/vm/runtime/thread.hpp Wed Oct 07 19:01:55 2009 -0400
+++ b/hotspot/src/share/vm/runtime/thread.hpp Fri Oct 16 02:05:46 2009 -0700
@@ -684,8 +684,13 @@
methodOop _callee_target;
// Oop results of VM runtime calls
- oop _vm_result; // Used to pass back an oop result into Java code, GC-preserved
- oop _vm_result_2; // Used to pass back an oop result into Java code, GC-preserved
+ oop _vm_result; // Used to pass back an oop result into Java code, GC-preserved
+ oop _vm_result_2; // Used to pass back an oop result into Java code, GC-preserved
+
+ // See ReduceInitialCardMarks: this holds the precise space interval of
+ // the most recent slow path allocation for which compiled code has
+ // elided card-marks for performance along the fast-path.
+ MemRegion _deferred_card_mark;
MonitorChunk* _monitor_chunks; // Contains the off stack monitors
// allocated during deoptimization
@@ -1082,6 +1087,9 @@
oop vm_result_2() const { return _vm_result_2; }
void set_vm_result_2 (oop x) { _vm_result_2 = x; }
+ MemRegion deferred_card_mark() const { return _deferred_card_mark; }
+ void set_deferred_card_mark(MemRegion mr) { _deferred_card_mark = mr; }
+
// Exception handling for compiled methods
oop exception_oop() const { return _exception_oop; }
int exception_stack_size() const { return _exception_stack_size; }