8177707: Specialize G1RemSet::refine_card for concurrent/during safepoint refinement
Reviewed-by: ehelin, kbarrett
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Fri Jun 02 13:47:54 2017 +0200
@@ -101,11 +101,7 @@
RefineCardTableEntryClosure() : _concurrent(true) { }
bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
- bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, NULL);
- // This path is executed by the concurrent refine or mutator threads,
- // concurrently, and so we do not care if card_ptr contains references
- // that point into the collection set.
- assert(!oops_into_cset, "should be");
+ G1CollectedHeap::heap()->g1_rem_set()->refine_card_concurrently(card_ptr, worker_i);
if (_concurrent && SuspendibleThreadSet::should_yield()) {
// Caller will actually yield.
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp Fri Jun 02 13:47:54 2017 +0200
@@ -171,6 +171,24 @@
virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
};
+class G1ConcurrentRefineOopClosure: public ExtendedOopClosure {
+ G1CollectedHeap* _g1;
+ uint _worker_i;
+
+public:
+ G1ConcurrentRefineOopClosure(G1CollectedHeap* g1h, uint worker_i) :
+ _g1(g1h),
+ _worker_i(worker_i) {
+ }
+
+ // This closure needs special handling for InstanceRefKlass.
+ virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
+
+ template <class T> void do_oop_nv(T* p);
+ virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+ virtual void do_oop(oop* p) { do_oop_nv(p); }
+};
+
class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
G1CollectedHeap* _g1;
HeapRegion* _from;
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp Fri Jun 02 13:47:54 2017 +0200
@@ -111,6 +111,54 @@
}
template <class T>
+inline static void check_obj_during_refinement(T* p, oop const obj) {
+#ifdef ASSERT
+ G1CollectedHeap* g1 = G1CollectedHeap::heap();
+ // can't do because of races
+ // assert(obj == NULL || obj->is_oop(), "expected an oop");
+ assert(check_obj_alignment(obj), "not oop aligned");
+ assert(g1->is_in_reserved(obj), "must be in heap");
+
+ HeapRegion* from = g1->heap_region_containing(p);
+
+ assert(from != NULL, "from region must be non-NULL");
+ assert(from->is_in_reserved(p) ||
+ (from->is_humongous() &&
+ g1->heap_region_containing(p)->is_humongous() &&
+ from->humongous_start_region() == g1->heap_region_containing(p)->humongous_start_region()),
+ "p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object starting at region %u.",
+ p2i(p), from->hrm_index(), from->humongous_start_region()->hrm_index());
+#endif // ASSERT
+}
+
+template <class T>
+inline void G1ConcurrentRefineOopClosure::do_oop_nv(T* p) {
+ T o = oopDesc::load_heap_oop(p);
+ if (oopDesc::is_null(o)) {
+ return;
+ }
+ oop obj = oopDesc::decode_heap_oop_not_null(o);
+
+ check_obj_during_refinement(p, obj);
+
+ if (HeapRegion::is_in_same_region(p, obj)) {
+ // Normally this closure should only be called with cross-region references.
+ // But since Java threads are manipulating the references concurrently and we
+ // reload the values things may have changed.
+ // This check lets slip through references from a humongous continues region
+ // to its humongous start region, as they are in different regions, and adds a
+ // remembered set entry. This is benign (apart from memory usage), as this
+ // closure is never called during evacuation.
+ return;
+ }
+
+ HeapRegion* to = _g1->heap_region_containing(obj);
+
+ assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+ to->rem_set()->add_reference(p, _worker_i);
+}
+
+template <class T>
inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
if (obj == NULL) {
@@ -161,9 +209,9 @@
// we have already visited/tried to copy this object
// there is no need to retry.
if (!self_forwarded(obj)) {
- assert(_push_ref_cl != NULL, "should not be null");
- // Push the reference in the refs queue of the G1ParScanThreadState
- // instance for this worker thread.
+ assert(_push_ref_cl != NULL, "should not be null");
+ // Push the reference in the refs queue of the G1ParScanThreadState
+ // instance for this worker thread.
_push_ref_cl->do_oop(p);
}
_has_refs_into_cset = true;
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp Fri Jun 02 13:47:54 2017 +0200
@@ -413,7 +413,7 @@
assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
assert(worker_i < ParallelGCThreads, "should be a GC worker");
- if (_g1rs->refine_card(card_ptr, worker_i, _cl)) {
+ if (_g1rs->refine_card_during_gc(card_ptr, worker_i, _cl)) {
// 'card_ptr' contains references that point into the collection
// set. We need to record the card in the DCQS
// (_into_cset_dirty_card_queue_set)
@@ -523,6 +523,18 @@
_g1->heap_region_par_iterate(&scrub_cl, worker_num, hrclaimer);
}
+inline void check_card_ptr(jbyte* card_ptr, CardTableModRefBS* ct_bs) {
+#ifdef ASSERT
+ G1CollectedHeap* g1 = G1CollectedHeap::heap();
+ assert(g1->is_in_exact(ct_bs->addr_for(card_ptr)),
+ "Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
+ p2i(card_ptr),
+ ct_bs->index_for(ct_bs->addr_for(card_ptr)),
+ p2i(ct_bs->addr_for(card_ptr)),
+ g1->addr_to_region(ct_bs->addr_for(card_ptr)));
+#endif
+}
+
G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
G1ParPushHeapRSClosure* push_ref_cl,
bool record_refs_into_cset,
@@ -534,24 +546,165 @@
_push_ref_cl(push_ref_cl),
_worker_i(worker_i) { }
-// Returns true if the given card contains references that point
-// into the collection set, if we're checking for such references;
-// false otherwise.
+void G1RemSet::refine_card_concurrently(jbyte* card_ptr,
+ uint worker_i) {
+ assert(!_g1->is_gc_active(), "Only call concurrently");
-bool G1RemSet::refine_card(jbyte* card_ptr,
- uint worker_i,
- G1ParPushHeapRSClosure* oops_in_heap_closure) {
- assert(_g1->is_in_exact(_ct_bs->addr_for(card_ptr)),
- "Card at " PTR_FORMAT " index " SIZE_FORMAT " representing heap at " PTR_FORMAT " (%u) must be in committed heap",
- p2i(card_ptr),
- _ct_bs->index_for(_ct_bs->addr_for(card_ptr)),
- p2i(_ct_bs->addr_for(card_ptr)),
- _g1->addr_to_region(_ct_bs->addr_for(card_ptr)));
-
- bool check_for_refs_into_cset = oops_in_heap_closure != NULL;
+ check_card_ptr(card_ptr, _ct_bs);
// If the card is no longer dirty, nothing to do.
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+ return;
+ }
+
+ // Construct the region representing the card.
+ HeapWord* start = _ct_bs->addr_for(card_ptr);
+ // And find the region containing it.
+ HeapRegion* r = _g1->heap_region_containing(start);
+
+ // This check is needed for some uncommon cases where we should
+ // ignore the card.
+ //
+ // The region could be young. Cards for young regions are
+ // distinctly marked (set to g1_young_gen), so the post-barrier will
+ // filter them out. However, that marking is performed
+ // concurrently. A write to a young object could occur before the
+ // card has been marked young, slipping past the filter.
+ //
+ // The card could be stale, because the region has been freed since
+ // the card was recorded. In this case the region type could be
+ // anything. If (still) free or (reallocated) young, just ignore
+ // it. If (reallocated) old or humongous, the later card trimming
+ // and additional checks in iteration may detect staleness. At
+ // worst, we end up processing a stale card unnecessarily.
+ //
+ // In the normal (non-stale) case, the synchronization between the
+ // enqueueing of the card and processing it here will have ensured
+ // we see the up-to-date region type here.
+ if (!r->is_old_or_humongous()) {
+ return;
+ }
+
+ // While we are processing RSet buffers during the collection, we
+ // actually don't want to scan any cards on the collection set,
+ // since we don't want to update remembered sets with entries that
+ // point into the collection set, given that live objects from the
+ // collection set are about to move and such entries will be stale
+ // very soon. This change also deals with a reliability issue which
+ // involves scanning a card in the collection set and coming across
+ // an array that was being chunked and looking malformed. Note,
+ // however, that if evacuation fails, we have to scan any objects
+ // that were not moved and create any missing entries.
+ if (r->in_collection_set()) {
+ return;
+ }
+
+ // The result from the hot card cache insert call is either:
+ // * pointer to the current card
+ // (implying that the current card is not 'hot'),
+ // * null
+ // (meaning we had inserted the card ptr into the "hot" card cache,
+ // which had some headroom),
+ // * a pointer to a "hot" card that was evicted from the "hot" cache.
+ //
+
+ if (_hot_card_cache->use_cache()) {
+ assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
+
+ const jbyte* orig_card_ptr = card_ptr;
+ card_ptr = _hot_card_cache->insert(card_ptr);
+ if (card_ptr == NULL) {
+ // There was no eviction. Nothing to do.
+ return;
+ } else if (card_ptr != orig_card_ptr) {
+ // Original card was inserted and an old card was evicted.
+ start = _ct_bs->addr_for(card_ptr);
+ r = _g1->heap_region_containing(start);
+
+ // Check whether the region formerly in the cache should be
+ // ignored, as discussed earlier for the original card. The
+ // region could have been freed while in the cache. The cset is
+ // not relevant here, since we're in concurrent phase.
+ if (!r->is_old_or_humongous()) {
+ return;
+ }
+ } // Else we still have the original card.
+ }
+
+ // Trim the region designated by the card to what's been allocated
+ // in the region. The card could be stale, or the card could cover
+ // (part of) an object at the end of the allocated space and extend
+ // beyond the end of allocation.
+
+ // Non-humongous objects are only allocated in the old-gen during
+ // GC, so if region is old then top is stable. Humongous object
+ // allocation sets top last; if top has not yet been set, this is
+ // a stale card and we'll end up with an empty intersection. If
+ // this is not a stale card, the synchronization between the
+ // enqueuing of the card and processing it here will have ensured
+ // we see the up-to-date top here.
+ HeapWord* scan_limit = r->top();
+
+ if (scan_limit <= start) {
+ // If the trimmed region is empty, the card must be stale.
+ return;
+ }
+
+ // Okay to clean and process the card now. There are still some
+ // stale card cases that may be detected by iteration and dealt with
+ // as iteration failure.
+ *const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
+
+ // This fence serves two purposes. First, the card must be cleaned
+ // before processing the contents. Second, we can't proceed with
+ // processing until after the read of top, for synchronization with
+ // possibly concurrent humongous object allocation. It's okay that
+ // reading top and reading type were racy wrto each other. We need
+ // both set, in any order, to proceed.
+ OrderAccess::fence();
+
+ // Don't use addr_for(card_ptr + 1) which can ask for
+ // a card beyond the heap.
+ HeapWord* end = start + CardTableModRefBS::card_size_in_words;
+ MemRegion dirty_region(start, MIN2(scan_limit, end));
+ assert(!dirty_region.is_empty(), "sanity");
+
+ G1ConcurrentRefineOopClosure conc_refine_cl(_g1, worker_i);
+
+ bool card_processed =
+ r->oops_on_card_seq_iterate_careful<false>(dirty_region, &conc_refine_cl);
+
+ // If unable to process the card then we encountered an unparsable
+ // part of the heap (e.g. a partially allocated object) while
+ // processing a stale card. Despite the card being stale, redirty
+ // and re-enqueue, because we've already cleaned the card. Without
+ // this we could incorrectly discard a non-stale card.
+ if (!card_processed) {
+ // The card might have gotten re-dirtied and re-enqueued while we
+ // worked. (In fact, it's pretty likely.)
+ if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
+ *card_ptr = CardTableModRefBS::dirty_card_val();
+ MutexLockerEx x(Shared_DirtyCardQ_lock,
+ Mutex::_no_safepoint_check_flag);
+ DirtyCardQueue* sdcq =
+ JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
+ sdcq->enqueue(card_ptr);
+ }
+ } else {
+ _conc_refine_cards++;
+ }
+}
+
+bool G1RemSet::refine_card_during_gc(jbyte* card_ptr,
+ uint worker_i,
+ G1ParPushHeapRSClosure* oops_in_heap_closure) {
+ assert(_g1->is_gc_active(), "Only call during GC");
+
+ check_card_ptr(card_ptr, _ct_bs);
+
+ // If the card is no longer dirty, nothing to do. This covers cards that were already
+ // scanned as parts of the remembered sets.
+ if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
// No need to return that this card contains refs that point
// into the collection set.
return false;
@@ -599,59 +752,16 @@
return false;
}
- // The result from the hot card cache insert call is either:
- // * pointer to the current card
- // (implying that the current card is not 'hot'),
- // * null
- // (meaning we had inserted the card ptr into the "hot" card cache,
- // which had some headroom),
- // * a pointer to a "hot" card that was evicted from the "hot" cache.
- //
-
- if (_hot_card_cache->use_cache()) {
- assert(!check_for_refs_into_cset, "sanity");
- assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
-
- const jbyte* orig_card_ptr = card_ptr;
- card_ptr = _hot_card_cache->insert(card_ptr);
- if (card_ptr == NULL) {
- // There was no eviction. Nothing to do.
- return false;
- } else if (card_ptr != orig_card_ptr) {
- // Original card was inserted and an old card was evicted.
- start = _ct_bs->addr_for(card_ptr);
- r = _g1->heap_region_containing(start);
-
- // Check whether the region formerly in the cache should be
- // ignored, as discussed earlier for the original card. The
- // region could have been freed while in the cache. The cset is
- // not relevant here, since we're in concurrent phase.
- if (!r->is_old_or_humongous()) {
- return false;
- }
- } // Else we still have the original card.
- }
-
// Trim the region designated by the card to what's been allocated
// in the region. The card could be stale, or the card could cover
// (part of) an object at the end of the allocated space and extend
// beyond the end of allocation.
- HeapWord* scan_limit;
- if (_g1->is_gc_active()) {
- // If we're in a STW GC, then a card might be in a GC alloc region
- // and extend onto a GC LAB, which may not be parsable. Stop such
- // at the "scan_top" of the region.
- scan_limit = r->scan_top();
- } else {
- // Non-humongous objects are only allocated in the old-gen during
- // GC, so if region is old then top is stable. Humongous object
- // allocation sets top last; if top has not yet been set, this is
- // a stale card and we'll end up with an empty intersection. If
- // this is not a stale card, the synchronization between the
- // enqueuing of the card and processing it here will have ensured
- // we see the up-to-date top here.
- scan_limit = r->top();
- }
+
+ // If we're in a STW GC, then a card might be in a GC alloc region
+ // and extend onto a GC LAB, which may not be parsable. Stop such
+ // at the "scan_top" of the region.
+ HeapWord* scan_limit = r->scan_top();
+
if (scan_limit <= start) {
// If the trimmed region is empty, the card must be stale.
return false;
@@ -662,14 +772,6 @@
// as iteration failure.
*const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
- // This fence serves two purposes. First, the card must be cleaned
- // before processing the contents. Second, we can't proceed with
- // processing until after the read of top, for synchronization with
- // possibly concurrent humongous object allocation. It's okay that
- // reading top and reading type were racy wrto each other. We need
- // both set, in any order, to proceed.
- OrderAccess::fence();
-
// Don't use addr_for(card_ptr + 1) which can ask for
// a card beyond the heap.
HeapWord* end = start + CardTableModRefBS::card_size_in_words;
@@ -678,49 +780,17 @@
G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
oops_in_heap_closure,
- check_for_refs_into_cset,
+ true,
worker_i);
update_rs_oop_cl.set_from(r);
- bool card_processed;
- if (_g1->is_gc_active()) {
- card_processed = r->oops_on_card_seq_iterate_careful<true>(dirty_region, &update_rs_oop_cl);
- } else {
- card_processed = r->oops_on_card_seq_iterate_careful<false>(dirty_region, &update_rs_oop_cl);
- }
+ bool card_processed =
+ r->oops_on_card_seq_iterate_careful<true>(dirty_region,
+ &update_rs_oop_cl);
+ assert(card_processed, "must be");
+ _conc_refine_cards++;
- // If unable to process the card then we encountered an unparsable
- // part of the heap (e.g. a partially allocated object) while
- // processing a stale card. Despite the card being stale, redirty
- // and re-enqueue, because we've already cleaned the card. Without
- // this we could incorrectly discard a non-stale card.
- if (!card_processed) {
- assert(!_g1->is_gc_active(), "Unparsable heap during GC");
- // The card might have gotten re-dirtied and re-enqueued while we
- // worked. (In fact, it's pretty likely.)
- if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
- *card_ptr = CardTableModRefBS::dirty_card_val();
- MutexLockerEx x(Shared_DirtyCardQ_lock,
- Mutex::_no_safepoint_check_flag);
- DirtyCardQueue* sdcq =
- JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
- sdcq->enqueue(card_ptr);
- }
- } else {
- _conc_refine_cards++;
- }
-
- // This gets set to true if the card being refined has references that point
- // into the collection set.
- bool has_refs_into_cset = update_rs_oop_cl.has_refs_into_cset();
-
- // We should only be detecting that the card contains references
- // that point into the collection set if the current thread is
- // a GC worker thread.
- assert(!has_refs_into_cset || SafepointSynchronize::is_at_safepoint(),
- "invalid result at non safepoint");
-
- return has_refs_into_cset;
+ return update_rs_oop_cl.has_refs_into_cset();
}
void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp Fri Jun 02 13:47:54 2017 +0200
@@ -146,12 +146,14 @@
void scrub(uint worker_num, HeapRegionClaimer* hrclaimer);
// Refine the card corresponding to "card_ptr".
- // If oops_in_heap_closure is not NULL, a true result is returned
- // if the given card contains oops that have references into the
- // current collection set.
- bool refine_card(jbyte* card_ptr,
- uint worker_i,
- G1ParPushHeapRSClosure* oops_in_heap_closure);
+ void refine_card_concurrently(jbyte* card_ptr,
+ uint worker_i);
+
+ // Refine the card corresponding to "card_ptr". Returns "true" if the given card contains
+ // oops that have references into the current collection set.
+ bool refine_card_during_gc(jbyte* card_ptr,
+ uint worker_i,
+ G1ParPushHeapRSClosure* oops_in_heap_closure);
// Print accumulated summary info from the start of the VM.
void print_summary_info();
--- a/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp Fri Jun 02 13:45:21 2017 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp Fri Jun 02 13:47:54 2017 +0200
@@ -36,6 +36,8 @@
class G1ParPushHeapRSClosure;
class G1UpdateRSOrPushRefOopClosure;
+class G1ConcurrentRefineOopClosure;
+
class G1CMOopClosure;
class G1RootRegionScanClosure;
@@ -43,6 +45,7 @@
f(G1ParScanClosure,_nv) \
f(G1ParPushHeapRSClosure,_nv) \
f(G1UpdateRSOrPushRefOopClosure,_nv) \
+ f(G1ConcurrentRefineOopClosure,_nv) \
f(G1CMOopClosure,_nv) \
f(G1RootRegionScanClosure,_nv)