# HG changeset patch # User ysr # Date 1263425199 28800 # Node ID af4d405aacc161ba02b88b45d54cf37bb6465157 # Parent 90e00469187362a0525302cd4c61109fb5379f61 6896647: card marks can be deferred too long Summary: Deferred card marks are now flushed during the gc prologue. Parallel[Scavege,OldGC] and SerialGC no longer defer card marks generated by COMPILER2 as a result of ReduceInitialCardMarks. For these cases, introduced a diagnostic option to defer the card marks, only for the purposes of testing and diagnostics. CMS and G1 continue to defer card marks. Potential performance concern related to single-threaded flushing of deferred card marks in the gc prologue will be addressed in the future. Reviewed-by: never, johnc diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -1441,6 +1441,7 @@ } jint G1CollectedHeap::initialize() { + CollectedHeap::pre_initialize(); os::enable_vtime(); // Necessary to satisfy locking discipline assertions. diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -1007,6 +1007,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return true; + } + bool is_in_young(oop obj) { HeapRegion* hr = heap_region_containing(obj); return hr != NULL && hr->is_young(); diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -51,6 +51,8 @@ } jint ParallelScavengeHeap::initialize() { + CollectedHeap::pre_initialize(); + // Cannot be initialized until after the flags are parsed GenerationSizer flag_parser; @@ -717,10 +719,6 @@ return young_gen()->allocate(size, true); } -void ParallelScavengeHeap::fill_all_tlabs(bool retire) { - CollectedHeap::fill_all_tlabs(retire); -} - void ParallelScavengeHeap::accumulate_statistics_all_tlabs() { CollectedHeap::accumulate_statistics_all_tlabs(); } diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -54,7 +54,6 @@ protected: static inline size_t total_invocations(); HeapWord* allocate_new_tlab(size_t size); - void fill_all_tlabs(bool retire); public: ParallelScavengeHeap() : CollectedHeap() { @@ -191,6 +190,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return false; + } + // Return true if we don't we need a store barrier for // initializing stores to an object at this address. virtual bool can_elide_initializing_store_barrier(oop new_obj); diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_interface/collectedHeap.cpp --- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -59,8 +59,18 @@ PerfDataManager::create_string_variable(SUN_GC, "lastCause", 80, GCCause::to_string(_gc_lastcause), CHECK); } + _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below. } +void CollectedHeap::pre_initialize() { + // Used for ReduceInitialCardMarks (when COMPILER2 is used); + // otherwise remains unused. +#ifdef COMPLER2 + _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store()); +#else + assert(_defer_initial_card_mark == false, "Who would set it?"); +#endif +} #ifndef PRODUCT void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) { @@ -140,12 +150,13 @@ void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { MemRegion deferred = thread->deferred_card_mark(); if (!deferred.is_empty()) { + assert(_defer_initial_card_mark, "Otherwise should be empty"); { // Verify that the storage points to a parsable object in heap DEBUG_ONLY(oop old_obj = oop(deferred.start());) assert(is_in(old_obj), "Not in allocated heap"); assert(!can_elide_initializing_store_barrier(old_obj), - "Else should have been filtered in defer_store_barrier()"); + "Else should have been filtered in new_store_pre_barrier()"); assert(!is_in_permanent(old_obj), "Sanity: not expected"); assert(old_obj->is_oop(true), "Not an oop"); assert(old_obj->is_parsable(), "Will not be concurrently parsable"); @@ -174,9 +185,7 @@ // so long as the card-mark is completed before the next // scavenge. For all these cases, we can do a card mark // at the point at which we do a slow path allocation -// in the old gen. For uniformity, however, we end -// up using the same scheme (see below) for all three -// cases (deferring the card-mark appropriately). +// in the old gen, i.e. in this call. // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires // in addition that the card-mark for an old gen allocated // object strictly follow any associated initializing stores. @@ -199,12 +208,13 @@ // but, like in CMS, because of the presence of concurrent refinement // (much like CMS' precleaning), must strictly follow the oop-store. // Thus, using the same protocol for maintaining the intended -// invariants turns out, serendepitously, to be the same for all -// three collectors/heap types above. +// invariants turns out, serendepitously, to be the same for both +// G1 and CMS. // -// For each future collector, this should be reexamined with -// that specific collector in mind. -oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) { +// For any future collector, this code should be reexamined with +// that specific collector in mind, and the documentation above suitably +// extended and updated. +oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) { // If a previous card-mark was deferred, flush it now. flush_deferred_store_barrier(thread); if (can_elide_initializing_store_barrier(new_obj)) { @@ -212,10 +222,17 @@ // following the flush above. assert(thread->deferred_card_mark().is_empty(), "Error"); } else { - // Remember info for the newly deferred store barrier - MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size()); - assert(!deferred.is_empty(), "Error"); - thread->set_deferred_card_mark(deferred); + MemRegion mr((HeapWord*)new_obj, new_obj->size()); + assert(!mr.is_empty(), "Error"); + if (_defer_initial_card_mark) { + // Defer the card mark + thread->set_deferred_card_mark(mr); + } else { + // Do the card mark + BarrierSet* bs = barrier_set(); + assert(bs->has_write_region_opt(), "No write_region() on BarrierSet"); + bs->write_region(mr); + } } return new_obj; } @@ -313,22 +330,6 @@ return NULL; } -void CollectedHeap::fill_all_tlabs(bool retire) { - assert(UseTLAB, "should not reach here"); - // See note in ensure_parsability() below. - assert(SafepointSynchronize::is_at_safepoint() || - !is_init_completed(), - "should only fill tlabs at safepoint"); - // The main thread starts allocating via a TLAB even before it - // has added itself to the threads list at vm boot-up. - assert(Threads::first() != NULL, - "Attempt to fill tlabs before main thread has been added" - " to threads list is doomed to failure!"); - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { - thread->tlab().make_parsable(retire); - } -} - void CollectedHeap::ensure_parsability(bool retire_tlabs) { // The second disjunct in the assertion below makes a concession // for the start-up verification done while the VM is being @@ -343,8 +344,24 @@ "Should only be called at a safepoint or at start-up" " otherwise concurrent mutator activity may make heap " " unparsable again"); - if (UseTLAB) { - fill_all_tlabs(retire_tlabs); + const bool use_tlab = UseTLAB; + const bool deferred = _defer_initial_card_mark; + // The main thread starts allocating via a TLAB even before it + // has added itself to the threads list at vm boot-up. + assert(!use_tlab || Threads::first() != NULL, + "Attempt to fill tlabs before main thread has been added" + " to threads list is doomed to failure!"); + for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) { + if (use_tlab) thread->tlab().make_parsable(retire_tlabs); +#ifdef COMPILER2 + // The deferred store barriers must all have been flushed to the + // card-table (or other remembered set structure) before GC starts + // processing the card-table (or other remembered set). + if (deferred) flush_deferred_store_barrier(thread); +#else + assert(!deferred, "Should be false"); + assert(thread->deferred_card_mark().is_empty(), "Should be empty"); +#endif } } diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/gc_interface/collectedHeap.hpp --- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -51,6 +51,9 @@ // Used for filler objects (static, but initialized in ctor). static size_t _filler_array_max_size; + // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used + bool _defer_initial_card_mark; + protected: MemRegion _reserved; BarrierSet* _barrier_set; @@ -70,13 +73,16 @@ // Constructor CollectedHeap(); + // Do common initializations that must follow instance construction, + // for example, those needing virtual calls. + // This code could perhaps be moved into initialize() but would + // be slightly more awkward because we want the latter to be a + // pure virtual. + void pre_initialize(); + // Create a new tlab virtual HeapWord* allocate_new_tlab(size_t size); - // Fix up tlabs to make the heap well-formed again, - // optionally retiring the tlabs. - virtual void fill_all_tlabs(bool retire); - // Accumulate statistics on all tlabs. virtual void accumulate_statistics_all_tlabs(); @@ -431,14 +437,25 @@ // promises to call this function on such a slow-path-allocated // object before performing initializations that have elided // store barriers. Returns new_obj, or maybe a safer copy thereof. - virtual oop defer_store_barrier(JavaThread* thread, oop new_obj); + virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj); // Answers whether an initializing store to a new object currently - // allocated at the given address doesn't need a (deferred) store + // allocated at the given address doesn't need a store // barrier. Returns "true" if it doesn't need an initializing // store barrier; answers "false" if it does. virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0; + // If a compiler is eliding store barriers for TLAB-allocated objects, + // we will be informed of a slow-path allocation by a call + // to new_store_pre_barrier() above. Such a call precedes the + // initialization of the object itself, and no post-store-barriers will + // be issued. Some heap types require that the barrier strictly follows + // the initializing stores. (This is currently implemented by deferring the + // barrier until the next slow-path allocation or gc-related safepoint.) + // This interface answers whether a particular heap type needs the card + // mark to be thus strictly sequenced after the stores. + virtual bool card_mark_must_follow_store() const = 0; + // If the CollectedHeap was asked to defer a store barrier above, // this informs it to flush such a deferred store barrier to the // remembered set. diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/memory/genCollectedHeap.cpp --- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -51,6 +51,8 @@ } jint GenCollectedHeap::initialize() { + CollectedHeap::pre_initialize(); + int i; _n_gens = gen_policy()->number_of_generations(); @@ -129,6 +131,7 @@ _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions); set_barrier_set(rem_set()->bs()); + _gch = this; for (i = 0; i < _n_gens; i++) { diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/memory/genCollectedHeap.hpp --- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -260,6 +260,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return UseConcMarkSweepGC; + } + // We don't need barriers for stores to objects in the // young gen and, a fortiori, for initializing stores to // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS} diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/opto/graphKit.cpp --- a/hotspot/src/share/vm/opto/graphKit.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/opto/graphKit.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -3259,9 +3259,10 @@ if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) { // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp. - // That routine informs GC to take appropriate compensating steps - // so as to make this card-mark elision safe. + // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. + // That routine informs GC to take appropriate compensating steps, + // upon a slow-path allocation, so as to make this card-mark + // elision safe. return; } diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/opto/runtime.cpp --- a/hotspot/src/share/vm/opto/runtime.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/opto/runtime.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -143,7 +143,7 @@ // We failed the fast-path allocation. Now we need to do a scavenge or GC // and try allocation again. -void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) { +void OptoRuntime::new_store_pre_barrier(JavaThread* thread) { // After any safepoint, just before going back to compiled code, // we inform the GC that we will be doing initializing writes to // this object in the future without emitting card-marks, so @@ -156,7 +156,7 @@ assert(Universe::heap()->can_elide_tlab_store_barriers(), "compiler must check this first"); // GC may decide to give back a safer copy of new_obj. - new_obj = Universe::heap()->defer_store_barrier(thread, new_obj); + new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj); thread->set_vm_result(new_obj); } @@ -200,7 +200,7 @@ if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END @@ -239,7 +239,7 @@ if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/opto/runtime.hpp --- a/hotspot/src/share/vm/opto/runtime.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/opto/runtime.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -133,8 +133,9 @@ // Allocate storage for a objArray or typeArray static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); - // Post-slow-path-allocation step for implementing ReduceInitialCardMarks: - static void maybe_defer_card_mark(JavaThread* thread); + // Post-slow-path-allocation, pre-initializing-stores step for + // implementing ReduceInitialCardMarks + static void new_store_pre_barrier(JavaThread* thread); // Allocate storage for a multi-dimensional arrays // Note: needs to be fixed for arbitrary number of dimensions diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/runtime/globals.hpp Wed Jan 13 15:26:39 2010 -0800 @@ -2015,6 +2015,10 @@ diagnostic(bool, GCParallelVerificationEnabled, true, \ "Enable parallel memory system verification") \ \ + diagnostic(bool, DeferInitialCardMark, false, \ + "When +ReduceInitialCardMarks, explicitly defer any that " \ + "may arise from new_pre_store_barrier") \ + \ diagnostic(bool, VerifyRememberedSets, false, \ "Verify GC remembered sets") \ \ diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/runtime/thread.cpp --- a/hotspot/src/share/vm/runtime/thread.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/runtime/thread.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -2357,9 +2357,8 @@ }; void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { - // Flush deferred store-barriers, if any, associated with - // initializing stores done by this JavaThread in the current epoch. - Universe::heap()->flush_deferred_store_barrier(this); + // Verify that the deferred card marks have been flushed. + assert(deferred_card_mark().is_empty(), "Should be empty during GC"); // The ThreadProfiler oops_do is done from FlatProfiler::oops_do // since there may be more than one thread using each ThreadProfiler. diff -r 90e004691873 -r af4d405aacc1 hotspot/src/share/vm/runtime/vmStructs.cpp --- a/hotspot/src/share/vm/runtime/vmStructs.cpp Tue Jan 12 14:56:46 2010 -0800 +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Wed Jan 13 15:26:39 2010 -0800 @@ -309,6 +309,7 @@ nonstatic_field(CollectedHeap, _reserved, MemRegion) \ nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ nonstatic_field(CollectedHeap, _barrier_set, BarrierSet*) \ + nonstatic_field(CollectedHeap, _defer_initial_card_mark, bool) \ nonstatic_field(CollectedHeap, _is_gc_active, bool) \ nonstatic_field(CompactibleSpace, _compaction_top, HeapWord*) \ nonstatic_field(CompactibleSpace, _first_dead, HeapWord*) \