8166811: Missing memory fences between memory allocation and refinement
Summary: Refactored to have needed barrier
Reviewed-by: tschatzl, ehelin
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Tue Nov 22 20:24:47 2016 -0500
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp Tue Nov 22 20:50:31 2016 -0500
@@ -300,6 +300,8 @@
// thread to calculate the object size incorrectly.
Copy::fill_to_words(new_obj, oopDesc::header_size(), 0);
+ // Next, pad out the unused tail of the last region with filler
+ // objects, for improved usage accounting.
// How many words we use for filler objects.
size_t word_fill_size = word_size_sum - word_size;
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp Tue Nov 22 20:24:47 2016 -0500
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp Tue Nov 22 20:50:31 2016 -0500
@@ -575,18 +575,26 @@
// And find the region containing it.
HeapRegion* r = _g1->heap_region_containing(start);
- // Why do we have to check here whether a card is on a young region,
- // given that we dirty young regions and, as a result, the
- // post-barrier is supposed to filter them out and never to enqueue
- // them? When we allocate a new region as the "allocation region" we
- // actually dirty its cards after we release the lock, since card
- // dirtying while holding the lock was a performance bottleneck. So,
- // as a result, it is possible for other threads to actually
- // allocate objects in the region (after the acquire the lock)
- // before all the cards on the region are dirtied. This is unlikely,
- // and it doesn't happen often, but it can happen. So, the extra
- // check below filters out those cards.
- if (r->is_young()) {
+ // This check is needed for some uncommon cases where we should
+ // ignore the card.
+ //
+ // The region could be young. Cards for young regions are
+ // distinctly marked (set to g1_young_gen), so the post-barrier will
+ // filter them out. However, that marking is performed
+ // concurrently. A write to a young object could occur before the
+ // card has been marked young, slipping past the filter.
+ //
+ // The card could be stale, because the region has been freed since
+ // the card was recorded. In this case the region type could be
+ // anything. If (still) free or (reallocated) young, just ignore
+ // it. If (reallocated) old or humongous, the later card trimming
+ // and additional checks in iteration may detect staleness. At
+ // worst, we end up processing a stale card unnecessarily.
+ //
+ // In the normal (non-stale) case, the synchronization between the
+ // enqueueing of the card and processing it here will have ensured
+ // we see the up-to-date region type here.
+ if (!r->is_old_or_humongous()) {
return false;
}
@@ -617,26 +625,69 @@
assert(!check_for_refs_into_cset, "sanity");
assert(!SafepointSynchronize::is_at_safepoint(), "sanity");
+ const jbyte* orig_card_ptr = card_ptr;
card_ptr = _hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
return false;
- }
-
- start = _ct_bs->addr_for(card_ptr);
- r = _g1->heap_region_containing(start);
+ } else if (card_ptr != orig_card_ptr) {
+ // Original card was inserted and an old card was evicted.
+ start = _ct_bs->addr_for(card_ptr);
+ r = _g1->heap_region_containing(start);
- // Checking whether the region we got back from the cache
- // is young here is inappropriate. The region could have been
- // freed, reallocated and tagged as young while in the cache.
- // Hence we could see its young type change at any time.
+ // Check whether the region formerly in the cache should be
+ // ignored, as discussed earlier for the original card. The
+ // region could have been freed while in the cache. The cset is
+ // not relevant here, since we're in concurrent phase.
+ if (!r->is_old_or_humongous()) {
+ return false;
+ }
+ } // Else we still have the original card.
}
+ // Trim the region designated by the card to what's been allocated
+ // in the region. The card could be stale, or the card could cover
+ // (part of) an object at the end of the allocated space and extend
+ // beyond the end of allocation.
+ HeapWord* scan_limit;
+ if (_g1->is_gc_active()) {
+ // If we're in a STW GC, then a card might be in a GC alloc region
+ // and extend onto a GC LAB, which may not be parsable. Stop such
+ // at the "scan_top" of the region.
+ scan_limit = r->scan_top();
+ } else {
+ // Non-humongous objects are only allocated in the old-gen during
+ // GC, so if region is old then top is stable. Humongous object
+ // allocation sets top last; if top has not yet been set, this is
+ // a stale card and we'll end up with an empty intersection. If
+ // this is not a stale card, the synchronization between the
+ // enqueuing of the card and processing it here will have ensured
+ // we see the up-to-date top here.
+ scan_limit = r->top();
+ }
+ if (scan_limit <= start) {
+ // If the trimmed region is empty, the card must be stale.
+ return false;
+ }
+
+ // Okay to clean and process the card now. There are still some
+ // stale card cases that may be detected by iteration and dealt with
+ // as iteration failure.
+ *const_cast<volatile jbyte*>(card_ptr) = CardTableModRefBS::clean_card_val();
+
+ // This fence serves two purposes. First, the card must be cleaned
+ // before processing the contents. Second, we can't proceed with
+ // processing until after the read of top, for synchronization with
+ // possibly concurrent humongous object allocation. It's okay that
+ // reading top and reading type were racy wrto each other. We need
+ // both set, in any order, to proceed.
+ OrderAccess::fence();
+
// Don't use addr_for(card_ptr + 1) which can ask for
- // a card beyond the heap. This is not safe without a perm
- // gen at the upper end of the heap.
- HeapWord* end = start + CardTableModRefBS::card_size_in_words;
- MemRegion dirtyRegion(start, end);
+ // a card beyond the heap.
+ HeapWord* end = start + CardTableModRefBS::card_size_in_words;
+ MemRegion dirty_region(start, MIN2(scan_limit, end));
+ assert(!dirty_region.is_empty(), "sanity");
G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
_g1->g1_rem_set(),
@@ -655,24 +706,9 @@
(OopClosure*)&mux :
(OopClosure*)&update_rs_oop_cl));
- // The region for the current card may be a young region. The
- // current card may have been a card that was evicted from the
- // card cache. When the card was inserted into the cache, we had
- // determined that its region was non-young. While in the cache,
- // the region may have been freed during a cleanup pause, reallocated
- // and tagged as young.
- //
- // We wish to filter out cards for such a region but the current
- // thread, if we're running concurrently, may "see" the young type
- // change at any time (so an earlier "is_young" check may pass or
- // fail arbitrarily). We tell the iteration code to perform this
- // filtering when it has been determined that there has been an actual
- // allocation in this region and making it safe to check the young type.
-
bool card_processed =
- r->oops_on_card_seq_iterate_careful(dirtyRegion,
- &filter_then_update_rs_oop_cl,
- card_ptr);
+ r->oops_on_card_seq_iterate_careful(dirty_region,
+ &filter_then_update_rs_oop_cl);
// If unable to process the card then we encountered an unparsable
// part of the heap (e.g. a partially allocated object) while
--- a/hotspot/src/share/vm/gc/g1/heapRegion.cpp Tue Nov 22 20:24:47 2016 -0500
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.cpp Tue Nov 22 20:50:31 2016 -0500
@@ -373,6 +373,7 @@
return false;
}
+ // We have a well-formed humongous object at the start of sr.
// Only filler objects follow a humongous object in the containing
// regions, and we can ignore those. So only process the one
// humongous object.
@@ -396,49 +397,20 @@
}
bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr,
- FilterOutOfRegionClosure* cl,
- jbyte* card_ptr) {
- assert(card_ptr != NULL, "pre-condition");
+ FilterOutOfRegionClosure* cl) {
+ assert(MemRegion(bottom(), end()).contains(mr), "Card region not in heap region");
G1CollectedHeap* g1h = G1CollectedHeap::heap();
- // If we're within a stop-world GC, then we might look at a card in a
- // GC alloc region that extends onto a GC LAB, which may not be
- // parseable. Stop such at the "scan_top" of the region.
- if (g1h->is_gc_active()) {
- mr = mr.intersection(MemRegion(bottom(), scan_top()));
- } else {
- mr = mr.intersection(used_region());
- }
- if (mr.is_empty()) {
- return true;
- }
-
- // The intersection of the incoming mr (for the card) and the
- // allocated part of the region is non-empty. This implies that
- // we have actually allocated into this region. The code in
- // G1CollectedHeap.cpp that allocates a new region sets the
- // is_young tag on the region before allocating. Thus we
- // safely know if this region is young.
- if (is_young()) {
- return true;
- }
-
- // We can only clean the card here, after we make the decision that
- // the card is not young.
- *card_ptr = CardTableModRefBS::clean_card_val();
- // We must complete this write before we do any of the reads below.
- OrderAccess::storeload();
-
// Special handling for humongous regions.
if (is_humongous()) {
return do_oops_on_card_in_humongous(mr, cl, this, g1h);
}
+ assert(is_old(), "precondition");
- // During GC we limit mr by scan_top. So we never get here with an
- // mr covering objects allocated during GC. Non-humongous objects
- // are only allocated in the old-gen during GC. So the parts of the
- // heap that may be examined here are always parsable; there's no
- // need to use klass_or_null here to detect in-progress allocations.
+ // Because mr has been trimmed to what's been allocated in this
+ // region, the parts of the heap that are examined here are always
+ // parsable; there's no need to use klass_or_null to detect
+ // in-progress allocation.
// Cache the boundaries of the memory region in some const locals
HeapWord* const start = mr.start();
--- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp Tue Nov 22 20:24:47 2016 -0500
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp Tue Nov 22 20:50:31 2016 -0500
@@ -51,8 +51,9 @@
// object is larger than a heap region, the following regions will
// be of type ContinuesHumongous. In this case the top() of the
// StartHumongous region and all ContinuesHumongous regions except
-// the last will point to their own end. For the last ContinuesHumongous
-// region, top() will equal the object's top.
+// the last will point to their own end. The last ContinuesHumongous
+// region may have top() equal the end of object if there isn't
+// room for filler objects to pad out to the end of the region.
class G1CollectedHeap;
class HeapRegionRemSet;
@@ -433,6 +434,8 @@
bool is_old() const { return _type.is_old(); }
+ bool is_old_or_humongous() const { return _type.is_old_or_humongous(); }
+
// A pinned region contains objects which are not moved by garbage collections.
// Humongous regions and archive regions are pinned.
bool is_pinned() const { return _type.is_pinned(); }
@@ -653,17 +656,18 @@
}
}
- // Iterate over the card in the card designated by card_ptr,
- // applying cl to all references in the region.
- // mr: the memory region covered by the card.
- // card_ptr: if we decide that the card is not young and we iterate
- // over it, we'll clean the card before we start the iteration.
- // Returns true if the card was successfully processed, false if an
- // unparsable part of the heap was encountered, which should only
- // happen when invoked concurrently with the mutator.
+ // Iterate over the objects overlapping part of a card, applying cl
+ // to all references in the region. This is a helper for
+ // G1RemSet::refine_card, and is tightly coupled with it.
+ // mr: the memory region covered by the card, trimmed to the
+ // allocated space for this region. Must not be empty.
+ // This region must be old or humongous.
+ // Returns true if the designated objects were successfully
+ // processed, false if an unparsable part of the heap was
+ // encountered; that only happens when invoked concurrently with the
+ // mutator.
bool oops_on_card_seq_iterate_careful(MemRegion mr,
- FilterOutOfRegionClosure* cl,
- jbyte* card_ptr);
+ FilterOutOfRegionClosure* cl);
size_t recorded_rs_length() const { return _recorded_rs_length; }
double predicted_elapsed_time_ms() const { return _predicted_elapsed_time_ms; }
--- a/hotspot/src/share/vm/gc/g1/heapRegionType.hpp Tue Nov 22 20:24:47 2016 -0500
+++ b/hotspot/src/share/vm/gc/g1/heapRegionType.hpp Tue Nov 22 20:50:31 2016 -0500
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -120,6 +120,8 @@
// is_old regions may or may not also be pinned
bool is_old() const { return (get() & OldMask) != 0; }
+ bool is_old_or_humongous() const { return (get() & (OldMask | HumongousMask)) != 0; }
+
// is_pinned regions may be archive or humongous
bool is_pinned() const { return (get() & PinnedMask) != 0; }