--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Fri Nov 22 16:26:35 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp Fri Nov 22 17:03:55 2019 -0800
@@ -41,6 +41,7 @@
#include "runtime/safepoint.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/threadSMR.hpp"
+#include "utilities/quickSort.hpp"
G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
// Dirty card queues are always active, so we create them with their
@@ -226,21 +227,127 @@
return result;
}
+class G1RefineBufferedCards : public StackObj {
+ BufferNode* const _node;
+ CardTable::CardValue** const _node_buffer;
+ const size_t _node_buffer_size;
+ const uint _worker_id;
+ size_t* _total_refined_cards;
+ G1RemSet* const _g1rs;
+
+ static inline int compare_card(const CardTable::CardValue* p1,
+ const CardTable::CardValue* p2) {
+ return p2 - p1;
+ }
+
+ // Sorts the cards from start_index to _node_buffer_size in *decreasing*
+ // address order. Tests showed that this order is preferable to not sorting
+ // or increasing address order.
+ void sort_cards(size_t start_index) {
+ QuickSort::sort(&_node_buffer[start_index],
+ _node_buffer_size - start_index,
+ compare_card,
+ false);
+ }
+
+ // Returns the index to the first clean card in the buffer.
+ size_t clean_cards() {
+ const size_t start = _node->index();
+ assert(start <= _node_buffer_size, "invariant");
+
+ // Two-fingered compaction algorithm similar to the filtering mechanism in
+ // SATBMarkQueue. The main difference is that clean_card_before_refine()
+ // could change the buffer element in-place.
+ // We don't check for SuspendibleThreadSet::should_yield(), because
+ // cleaning and redirtying the cards is fast.
+ CardTable::CardValue** src = &_node_buffer[start];
+ CardTable::CardValue** dst = &_node_buffer[_node_buffer_size];
+ assert(src <= dst, "invariant");
+ for ( ; src < dst; ++src) {
+ // Search low to high for a card to keep.
+ if (_g1rs->clean_card_before_refine(src)) {
+ // Found keeper. Search high to low for a card to discard.
+ while (src < --dst) {
+ if (!_g1rs->clean_card_before_refine(dst)) {
+ *dst = *src; // Replace discard with keeper.
+ break;
+ }
+ }
+ // If discard search failed (src == dst), the outer loop will also end.
+ }
+ }
+
+ // dst points to the first retained clean card, or the end of the buffer
+ // if all the cards were discarded.
+ const size_t first_clean = dst - _node_buffer;
+ assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
+ // Discarded cards are considered as refined.
+ *_total_refined_cards += first_clean - start;
+ return first_clean;
+ }
+
+ bool refine_cleaned_cards(size_t start_index) {
+ bool result = true;
+ size_t i = start_index;
+ for ( ; i < _node_buffer_size; ++i) {
+ if (SuspendibleThreadSet::should_yield()) {
+ redirty_unrefined_cards(i);
+ result = false;
+ break;
+ }
+ _g1rs->refine_card_concurrently(_node_buffer[i], _worker_id);
+ }
+ _node->set_index(i);
+ *_total_refined_cards += i - start_index;
+ return result;
+ }
+
+ void redirty_unrefined_cards(size_t start) {
+ for ( ; start < _node_buffer_size; ++start) {
+ *_node_buffer[start] = G1CardTable::dirty_card_val();
+ }
+ }
+
+public:
+ G1RefineBufferedCards(BufferNode* node,
+ size_t node_buffer_size,
+ uint worker_id,
+ size_t* total_refined_cards) :
+ _node(node),
+ _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
+ _node_buffer_size(node_buffer_size),
+ _worker_id(worker_id),
+ _total_refined_cards(total_refined_cards),
+ _g1rs(G1CollectedHeap::heap()->rem_set()) {}
+
+ bool refine() {
+ size_t first_clean_index = clean_cards();
+ if (first_clean_index == _node_buffer_size) {
+ _node->set_index(first_clean_index);
+ return true;
+ }
+ // This fence serves two purposes. First, the cards must be cleaned
+ // before processing the contents. Second, we can't proceed with
+ // processing a region until after the read of the region's top in
+ // collect_and_clean_cards(), for synchronization with possibly concurrent
+ // humongous object allocation (see comment at the StoreStore fence before
+ // setting the regions' tops in humongous allocation path).
+ // It's okay that reading region's top and reading region's type were racy
+ // wrto each other. We need both set, in any order, to proceed.
+ OrderAccess::fence();
+ sort_cards(first_clean_index);
+ return refine_cleaned_cards(first_clean_index);
+ }
+};
+
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
uint worker_id,
size_t* total_refined_cards) {
- G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
- size_t size = buffer_size();
- void** buffer = BufferNode::make_buffer_from_node(node);
- size_t i = node->index();
- assert(i <= size, "invariant");
- for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
- CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
- rem_set->refine_card_concurrently(cp, worker_id);
- }
- *total_refined_cards += (i - node->index());
- node->set_index(i);
- return i == size;
+ G1RefineBufferedCards buffered_cards(node,
+ buffer_size(),
+ worker_id,
+ total_refined_cards);
+ return buffered_cards.refine();
}
#ifndef ASSERT
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 22 16:26:35 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Nov 22 17:03:55 2019 -0800
@@ -1261,25 +1261,27 @@
#endif
}
-void G1RemSet::refine_card_concurrently(CardValue* card_ptr,
- uint worker_id) {
+bool G1RemSet::clean_card_before_refine(CardValue** const card_ptr_addr) {
assert(!_g1h->is_gc_active(), "Only call concurrently");
- // Construct the region representing the card.
+ CardValue* card_ptr = *card_ptr_addr;
+ // Find the start address represented by the card.
HeapWord* start = _ct->addr_for(card_ptr);
// And find the region containing it.
HeapRegion* r = _g1h->heap_region_containing_or_null(start);
// If this is a (stale) card into an uncommitted region, exit.
if (r == NULL) {
- return;
+ return false;
}
check_card_ptr(card_ptr, _ct);
// If the card is no longer dirty, nothing to do.
+ // We cannot load the card value before the "r == NULL" check, because G1
+ // could uncommit parts of the card table covering uncommitted regions.
if (*card_ptr != G1CardTable::dirty_card_val()) {
- return;
+ return false;
}
// This check is needed for some uncommon cases where we should
@@ -1302,7 +1304,7 @@
// enqueueing of the card and processing it here will have ensured
// we see the up-to-date region type here.
if (!r->is_old_or_humongous_or_archive()) {
- return;
+ return false;
}
// The result from the hot card cache insert call is either:
@@ -1321,7 +1323,7 @@
card_ptr = _hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
- return;
+ return false;
} else if (card_ptr != orig_card_ptr) {
// Original card was inserted and an old card was evicted.
start = _ct->addr_for(card_ptr);
@@ -1331,8 +1333,9 @@
// ignored, as discussed earlier for the original card. The
// region could have been freed while in the cache.
if (!r->is_old_or_humongous_or_archive()) {
- return;
+ return false;
}
+ *card_ptr_addr = card_ptr;
} // Else we still have the original card.
}
@@ -1341,18 +1344,19 @@
// (part of) an object at the end of the allocated space and extend
// beyond the end of allocation.
- // Non-humongous objects are only allocated in the old-gen during
- // GC, so if region is old then top is stable. Humongous object
- // allocation sets top last; if top has not yet been set, this is
- // a stale card and we'll end up with an empty intersection. If
- // this is not a stale card, the synchronization between the
+ // Non-humongous objects are either allocated in the old regions during GC,
+ // or mapped in archive regions during startup. So if region is old or
+ // archive then top is stable.
+ // Humongous object allocation sets top last; if top has not yet been set,
+ // this is a stale card and we'll end up with an empty intersection.
+ // If this is not a stale card, the synchronization between the
// enqueuing of the card and processing it here will have ensured
// we see the up-to-date top here.
HeapWord* scan_limit = r->top();
if (scan_limit <= start) {
// If the trimmed region is empty, the card must be stale.
- return;
+ return false;
}
// Okay to clean and process the card now. There are still some
@@ -1360,13 +1364,26 @@
// as iteration failure.
*const_cast<volatile CardValue*>(card_ptr) = G1CardTable::clean_card_val();
- // This fence serves two purposes. First, the card must be cleaned
- // before processing the contents. Second, we can't proceed with
- // processing until after the read of top, for synchronization with
- // possibly concurrent humongous object allocation. It's okay that
- // reading top and reading type were racy wrto each other. We need
- // both set, in any order, to proceed.
- OrderAccess::fence();
+ return true;
+}
+
+void G1RemSet::refine_card_concurrently(CardValue* const card_ptr,
+ const uint worker_id) {
+ assert(!_g1h->is_gc_active(), "Only call concurrently");
+ check_card_ptr(card_ptr, _ct);
+
+ // Construct the MemRegion representing the card.
+ HeapWord* start = _ct->addr_for(card_ptr);
+ // And find the region containing it.
+ HeapRegion* r = _g1h->heap_region_containing(start);
+ // This reload of the top is safe even though it happens after the full
+ // fence, because top is stable for old, archive and unfiltered humongous
+ // regions, so it must return the same value as the previous load when
+ // cleaning the card. Also cleaning the card and refinement of the card
+ // cannot span across safepoint, so we don't need to worry about top being
+ // changed during safepoint.
+ HeapWord* scan_limit = r->top();
+ assert(scan_limit > start, "sanity");
// Don't use addr_for(card_ptr + 1) which can ask for
// a card beyond the heap.
--- a/src/hotspot/share/gc/g1/g1RemSet.hpp Fri Nov 22 16:26:35 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1RemSet.hpp Fri Nov 22 17:03:55 2019 -0800
@@ -113,10 +113,17 @@
G1GCPhaseTimes::GCParPhases coderoots_phase,
G1GCPhaseTimes::GCParPhases objcopy_phase);
- // Refine the card corresponding to "card_ptr". Safe to be called concurrently
- // to the mutator.
- void refine_card_concurrently(CardValue* card_ptr,
- uint worker_id);
+ // Two methods for concurrent refinement support, executed concurrently to
+ // the mutator:
+ // Cleans the card at "*card_ptr_addr" before refinement, returns true iff the
+ // card needs later refinement. Note that "*card_ptr_addr" could be updated to
+ // a different card due to use of hot card cache.
+ bool clean_card_before_refine(CardValue** const card_ptr_addr);
+ // Refine the region corresponding to "card_ptr". Must be called after
+ // being filtered by clean_card_before_refine(), and after proper
+ // fence/synchronization.
+ void refine_card_concurrently(CardValue* const card_ptr,
+ const uint worker_id);
// Print accumulated summary info from the start of the VM.
void print_summary_info();