8230404: Refactor logged card refinement support in G1DirtyCardQueueSet
authorkbarrett
Fri, 06 Sep 2019 13:38:55 -0400
changeset 58033 9162feb63c42
parent 58032 1ebc2f316e45
child 58034 318cd16cc202
8230404: Refactor logged card refinement support in G1DirtyCardQueueSet Summary: Separate concurrent refinement from STW refinement. Reviewed-by: sjohanss, tschatzl
src/hotspot/share/gc/g1/g1CardTableEntryClosure.hpp
src/hotspot/share/gc/g1/g1CollectedHeap.cpp
src/hotspot/share/gc/g1/g1CollectedHeap.hpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
src/hotspot/share/gc/g1/g1HotCardCache.cpp
src/hotspot/share/gc/g1/g1RemSet.cpp
--- a/src/hotspot/share/gc/g1/g1CardTableEntryClosure.hpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1CardTableEntryClosure.hpp	Fri Sep 06 13:38:55 2019 -0400
@@ -26,6 +26,7 @@
 #define SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
 
 #include "gc/shared/cardTable.hpp"
+#include "gc/shared/ptrQueue.hpp"
 #include "memory/allocation.hpp"
 
 // A closure class for processing card table entries.  Note that we don't
@@ -34,9 +35,17 @@
 public:
   typedef CardTable::CardValue CardValue;
 
-  // Process the card whose card table entry is "card_ptr".  If returns
-  // "false", terminate the iteration early.
-  virtual bool do_card_ptr(CardValue* card_ptr, uint worker_id) = 0;
+  // Process the card whose card table entry is "card_ptr".
+  virtual void do_card_ptr(CardValue* card_ptr, uint worker_id) = 0;
+
+  // Process all the card_ptrs in node.
+  void apply_to_buffer(BufferNode* node, size_t buffer_size, uint worker_id) {
+    void** buffer = BufferNode::make_buffer_from_node(node);
+    for (size_t i = node->index(); i < buffer_size; ++i) {
+      CardValue* card_ptr = static_cast<CardValue*>(buffer[i]);
+      do_card_ptr(card_ptr, worker_id);
+    }
+  }
 };
 
 #endif // SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Fri Sep 06 13:38:55 2019 -0400
@@ -132,7 +132,7 @@
   RedirtyLoggedCardTableEntryClosure(G1CollectedHeap* g1h) : G1CardTableEntryClosure(),
     _num_dirtied(0), _g1h(g1h), _g1_ct(g1h->card_table()) { }
 
-  bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
+  void do_card_ptr(CardValue* card_ptr, uint worker_i) {
     HeapRegion* hr = region_for_card(card_ptr);
 
     // Should only dirty cards in regions that won't be freed.
@@ -140,8 +140,6 @@
       *card_ptr = G1CardTable::dirty_card_val();
       _num_dirtied++;
     }
-
-    return true;
   }
 
   size_t num_dirtied()   const { return _num_dirtied; }
@@ -1948,12 +1946,6 @@
   _hot_card_cache->drain(cl, worker_i);
 }
 
-void G1CollectedHeap::iterate_dirty_card_closure(G1CardTableEntryClosure* cl, uint worker_i) {
-  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
-  while (dcqs.apply_closure_during_gc(cl, worker_i)) {}
-  assert(dcqs.num_cards() == 0, "Completed buffers exist!");
-}
-
 // Computes the sum of the storage used by the various regions.
 size_t G1CollectedHeap::used() const {
   size_t result = _summary_bytes_used + _allocator->used_in_alloc_regions();
@@ -3225,23 +3217,14 @@
   G1CollectedHeap* _g1h;
   BufferNode* volatile _nodes;
 
-  void apply(G1CardTableEntryClosure* cl, BufferNode* node, uint worker_id) {
-    void** buf = BufferNode::make_buffer_from_node(node);
-    size_t limit = _qset->buffer_size();
-    for (size_t i = node->index(); i < limit; ++i) {
-      CardTable::CardValue* card_ptr = static_cast<CardTable::CardValue*>(buf[i]);
-      bool result = cl->do_card_ptr(card_ptr, worker_id);
-      assert(result, "Closure should always return true");
-    }
-  }
-
-  void par_apply(G1CardTableEntryClosure* cl, uint worker_id) {
+  void par_apply(RedirtyLoggedCardTableEntryClosure* cl, uint worker_id) {
+    size_t buffer_size = _qset->buffer_size();
     BufferNode* next = Atomic::load(&_nodes);
     while (next != NULL) {
       BufferNode* node = next;
       next = Atomic::cmpxchg(node->next(), &_nodes, node);
       if (next == node) {
-        apply(cl, node, worker_id);
+        cl->apply_to_buffer(node, buffer_size, worker_id);
         next = node->next();
       }
     }
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Fri Sep 06 13:38:55 2019 -0400
@@ -993,9 +993,6 @@
   // Apply the given closure on all cards in the Hot Card Cache, emptying it.
   void iterate_hcc_closure(G1CardTableEntryClosure* cl, uint worker_i);
 
-  // Apply the given closure on all cards in the Dirty Card Queue Set, emptying it.
-  void iterate_dirty_card_closure(G1CardTableEntryClosure* cl, uint worker_i);
-
   // The shared block offset table array.
   G1BlockOffsetTable* bot() const { return _bot; }
 
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Fri Sep 06 13:38:55 2019 -0400
@@ -41,24 +41,6 @@
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
 
-// Closure used for updating remembered sets and recording references that
-// point into the collection set while the mutator is running.
-// Assumed to be only executed concurrently with the mutator. Yields via
-// SuspendibleThreadSet after every card.
-class G1RefineCardConcurrentlyClosure: public G1CardTableEntryClosure {
-public:
-  bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
-    G1CollectedHeap::heap()->rem_set()->refine_card_concurrently(card_ptr, worker_i);
-
-    if (SuspendibleThreadSet::should_yield()) {
-      // Caller will actually yield.
-      return false;
-    }
-    // Otherwise, we finished successfully; return true.
-    return true;
-  }
-};
-
 G1DirtyCardQueue::G1DirtyCardQueue(G1DirtyCardQueueSet* qset) :
   // Dirty card queues are always active, so we create them with their
   // active field set to true.
@@ -228,25 +210,27 @@
   verify_num_cards();
 }
 
-bool G1DirtyCardQueueSet::apply_closure_to_buffer(G1CardTableEntryClosure* cl,
-                                                  BufferNode* node,
-                                                  uint worker_i) {
-  if (cl == NULL) return true;
-  bool result = true;
-  void** buf = BufferNode::make_buffer_from_node(node);
+G1BufferNodeList G1DirtyCardQueueSet::take_all_completed_buffers() {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  G1BufferNodeList result(_completed_buffers_head, _completed_buffers_tail, _num_cards);
+  _completed_buffers_head = NULL;
+  _completed_buffers_tail = NULL;
+  _num_cards = 0;
+  return result;
+}
+
+bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
+  G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
+  size_t size = buffer_size();
+  void** buffer = BufferNode::make_buffer_from_node(node);
   size_t i = node->index();
-  size_t limit = buffer_size();
-  for ( ; i < limit; ++i) {
-    CardTable::CardValue* card_ptr = static_cast<CardTable::CardValue*>(buf[i]);
-    assert(card_ptr != NULL, "invariant");
-    if (!cl->do_card_ptr(card_ptr, worker_i)) {
-      result = false;           // Incomplete processing.
-      break;
-    }
+  assert(i <= size, "invariant");
+  for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
+    CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
+    rem_set->refine_card_concurrently(cp, worker_id);
   }
-  assert(i <= buffer_size(), "invariant");
   node->set_index(i);
-  return result;
+  return i == size;
 }
 
 #ifndef ASSERT
@@ -282,8 +266,7 @@
 
 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
-  G1RefineCardConcurrentlyClosure cl;
-  bool result = apply_closure_to_buffer(&cl, node, worker_id);
+  bool result = refine_buffer(node, worker_id);
   _free_ids.release_par_id(worker_id); // release the id
 
   if (result) {
@@ -293,35 +276,19 @@
   return result;
 }
 
-bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_i, size_t stop_at) {
-  G1RefineCardConcurrentlyClosure cl;
-  return apply_closure_to_completed_buffer(&cl, worker_i, stop_at, false);
-}
-
-bool G1DirtyCardQueueSet::apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i) {
-  assert_at_safepoint();
-  return apply_closure_to_completed_buffer(cl, worker_i, 0, true);
-}
-
-bool G1DirtyCardQueueSet::apply_closure_to_completed_buffer(G1CardTableEntryClosure* cl,
-                                                            uint worker_i,
-                                                            size_t stop_at,
-                                                            bool during_pause) {
-  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
-  BufferNode* nd = get_completed_buffer(stop_at);
-  if (nd == NULL) {
+bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) {
+  BufferNode* node = get_completed_buffer(stop_at);
+  if (node == NULL) {
     return false;
+  } else if (refine_buffer(node, worker_id)) {
+    assert_fully_consumed(node, buffer_size());
+    // Done with fully processed buffer.
+    deallocate_buffer(node);
+    Atomic::inc(&_processed_buffers_rs_thread);
+    return true;
   } else {
-    if (apply_closure_to_buffer(cl, nd, worker_i)) {
-      assert_fully_consumed(nd, buffer_size());
-      // Done with fully processed buffer.
-      deallocate_buffer(nd);
-      Atomic::inc(&_processed_buffers_rs_thread);
-    } else {
-      // Return partially processed buffer to the queue.
-      guarantee(!during_pause, "Should never stop early");
-      enqueue_completed_buffer(nd);
-    }
+    // Return partially processed buffer to the queue.
+    enqueue_completed_buffer(node);
     return true;
   }
 }
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Fri Sep 06 13:38:55 2019 -0400
@@ -25,11 +25,11 @@
 #ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
 #define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
 
+#include "gc/g1/g1BufferNodeList.hpp"
 #include "gc/g1/g1FreeIdSet.hpp"
 #include "gc/shared/ptrQueue.hpp"
 #include "memory/allocation.hpp"
 
-class G1CardTableEntryClosure;
 class G1DirtyCardQueueSet;
 class G1RedirtyCardsQueueSet;
 class Thread;
@@ -78,34 +78,14 @@
 
   void abandon_completed_buffers();
 
-  // Apply the closure to the elements of "node" from it's index to
-  // buffer_size.  If all closure applications return true, then
-  // returns true.  Stops processing after the first closure
-  // application that returns false, and returns false from this
-  // function.  The node's index is updated to exclude the processed
-  // elements, e.g. up to the element for which the closure returned
-  // false, or one past the last element if the closure always
-  // returned true.
-  bool apply_closure_to_buffer(G1CardTableEntryClosure* cl,
-                               BufferNode* node,
-                               uint worker_i = 0);
-
-  // If there are more than stop_at completed buffers, pop one, apply
-  // the specified closure to its active elements, and return true.
-  // Otherwise return false.
-  //
-  // A completely processed buffer is freed.  However, if a closure
-  // invocation returns false, processing is stopped and the partially
-  // processed buffer (with its index updated to exclude the processed
-  // elements, e.g. up to the element for which the closure returned
-  // false) is returned to the completed buffer set.
-  //
-  // If during_pause is true, stop_at must be zero, and the closure
-  // must never return false.
-  bool apply_closure_to_completed_buffer(G1CardTableEntryClosure* cl,
-                                         uint worker_i,
-                                         size_t stop_at,
-                                         bool during_pause);
+  // Refine the cards in "node" from it's index to buffer_size.
+  // Stops processing if SuspendibleThreadSet::should_yield() is true.
+  // Returns true if the entire buffer was processed, false if there
+  // is a pending yield request.  The node's index is updated to exclude
+  // the processed elements, e.g. up to the element before processing
+  // stopped, or one past the last element if the entire buffer was
+  // processed.
+  bool refine_buffer(BufferNode* node, uint worker_id);
 
   bool mut_process_buffer(BufferNode* node);
 
@@ -171,13 +151,16 @@
 
   void merge_bufferlists(G1RedirtyCardsQueueSet* src);
 
-  // Apply G1RefineCardConcurrentlyClosure to completed buffers until there are stop_at
-  // completed buffers remaining.
-  bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at);
+  G1BufferNodeList take_all_completed_buffers();
 
-  // Apply the given closure to all completed buffers. The given closure's do_card_ptr
-  // must never return false. Must only be called during GC.
-  bool apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i);
+  // If there are more than stop_at cards in the completed buffers, pop
+  // a buffer, refine its contents, and return true.  Otherwise return
+  // false.
+  //
+  // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
+  // returning the incompletely processed buffer to the completed buffer
+  // list, for later processing of the remainder.
+  bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at);
 
   // If a full collection is happening, reset partial logs, and release
   // completed ones: the full collection will make them all irrelevant.
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Sep 06 13:38:55 2019 -0400
@@ -99,8 +99,7 @@
     for (size_t i = start_idx; i < end_idx; i++) {
       CardValue* card_ptr = _hot_cache[i];
       if (card_ptr != NULL) {
-        bool result = cl->do_card_ptr(card_ptr, worker_i);
-        assert(result, "Closure should always return true");
+        cl->do_card_ptr(card_ptr, worker_i);
       } else {
         break;
       }
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Sep 06 15:13:38 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Sep 06 13:38:55 2019 -0400
@@ -42,6 +42,7 @@
 #include "gc/g1/heapRegionRemSet.inline.hpp"
 #include "gc/g1/sparsePRT.hpp"
 #include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/ptrQueue.hpp"
 #include "gc/shared/suspendibleThreadSet.hpp"
 #include "jfr/jfrEvents.hpp"
 #include "memory/iterator.hpp"
@@ -1060,7 +1061,7 @@
       _scan_state(scan_state), _ct(g1h->card_table()), _cards_dirty(0), _cards_skipped(0)
     {}
 
-    bool do_card_ptr(CardValue* card_ptr, uint worker_i) {
+    void do_card_ptr(CardValue* card_ptr, uint worker_i) {
       // The only time we care about recording cards that
       // contain references that point into the collection set
       // is during RSet updating within an evacuation pause.
@@ -1084,7 +1085,6 @@
         // regions to clear the card table at the end during the prepare() phase.
         _cards_skipped++;
       }
-      return true;
     }
 
     size_t cards_dirty() const { return _cards_dirty; }
@@ -1093,17 +1093,37 @@
 
   HeapRegionClaimer _hr_claimer;
   G1RemSetScanState* _scan_state;
+  BufferNode::Stack _dirty_card_buffers;
   bool _initial_evacuation;
 
   volatile bool _fast_reclaim_handled;
 
+  void apply_closure_to_dirty_card_buffers(G1MergeLogBufferCardsClosure* cl, uint worker_id) {
+    G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
+    size_t buffer_size = dcqs.buffer_size();
+    while (BufferNode* node = _dirty_card_buffers.pop()) {
+      cl->apply_to_buffer(node, buffer_size, worker_id);
+      dcqs.deallocate_buffer(node);
+    }
+  }
+
 public:
   G1MergeHeapRootsTask(G1RemSetScanState* scan_state, uint num_workers, bool initial_evacuation) :
     AbstractGangTask("G1 Merge Heap Roots"),
     _hr_claimer(num_workers),
     _scan_state(scan_state),
+    _dirty_card_buffers(),
     _initial_evacuation(initial_evacuation),
-    _fast_reclaim_handled(false) { }
+    _fast_reclaim_handled(false)
+  {
+    if (initial_evacuation) {
+      G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
+      G1BufferNodeList buffers = dcqs.take_all_completed_buffers();
+      if (buffers._entry_count != 0) {
+        _dirty_card_buffers.prepend(*buffers._head, *buffers._tail);
+      }
+    }
+  }
 
   virtual void work(uint worker_id) {
     G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -1158,7 +1178,7 @@
       G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::MergeLB, worker_id);
 
       G1MergeLogBufferCardsClosure cl(g1h, _scan_state);
-      g1h->iterate_dirty_card_closure(&cl, worker_id);
+      apply_closure_to_dirty_card_buffers(&cl, worker_id);
 
       p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeLBDirtyCards);
       p->record_thread_work_item(G1GCPhaseTimes::MergeLB, worker_id, cl.cards_skipped(), G1GCPhaseTimes::MergeLBSkippedCards);