8162929: Enqueuing dirty cards into a single DCQS during GC does not scale
authorkbarrett
Fri, 19 Jul 2019 16:47:11 -0400
changeset 55752 8ae33203d600
parent 55751 014decdb5086
child 55753 b9798272720b
child 57477 05637b1896c6
child 57924 ce786c3f1f1c
8162929: Enqueuing dirty cards into a single DCQS during GC does not scale Summary: Refactor into G1RedirtyCardsQueue[Set] and G1DirtyCardQueueSet Reviewed-by: tschatzl, sangheki
src/hotspot/share/gc/g1/g1CardTableEntryClosure.hpp
src/hotspot/share/gc/g1/g1CollectedHeap.cpp
src/hotspot/share/gc/g1/g1CollectedHeap.hpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
src/hotspot/share/gc/g1/g1EvacFailure.cpp
src/hotspot/share/gc/g1/g1HotCardCache.cpp
src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp
src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp
src/hotspot/share/gc/g1/g1RemSet.cpp
src/hotspot/share/gc/shared/ptrQueue.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/g1/g1CardTableEntryClosure.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
+#define SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
+
+#include "gc/shared/cardTable.hpp"
+#include "memory/allocation.hpp"
+
+// A closure class for processing card table entries.  Note that we don't
+// require these closure objects to be stack-allocated.
+class G1CardTableEntryClosure: public CHeapObj<mtGC> {
+public:
+  typedef CardTable::CardValue CardValue;
+
+  // Process the card whose card table entry is "card_ptr".  If returns
+  // "false", terminate the iteration early.
+  virtual bool do_card_ptr(CardValue* card_ptr, uint worker_id) = 0;
+};
+
+#endif // SHARE_GC_G1_G1CARDTABLEENTRYCLOSURE_HPP
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -31,6 +31,7 @@
 #include "gc/g1/g1Allocator.inline.hpp"
 #include "gc/g1/g1Arguments.hpp"
 #include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1CardTableEntryClosure.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectionSet.hpp"
 #include "gc/g1/g1CollectorState.hpp"
@@ -49,6 +50,7 @@
 #include "gc/g1/g1OopClosures.inline.hpp"
 #include "gc/g1/g1ParScanThreadState.inline.hpp"
 #include "gc/g1/g1Policy.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
 #include "gc/g1/g1RegionToSpaceMapper.hpp"
 #include "gc/g1/g1RemSet.hpp"
 #include "gc/g1/g1RootClosures.hpp"
@@ -1078,7 +1080,9 @@
 
   // Discard all remembered set updates.
   G1BarrierSet::dirty_card_queue_set().abandon_logs();
-  assert(dirty_card_queue_set().completed_buffers_num() == 0, "DCQS should be empty");
+  assert(G1BarrierSet::dirty_card_queue_set().completed_buffers_num() == 0,
+         "DCQS should be empty");
+  redirty_cards_queue_set().verify_empty();
 }
 
 void G1CollectedHeap::verify_after_full_collection() {
@@ -1517,7 +1521,7 @@
   _collection_set(this, _policy),
   _hot_card_cache(NULL),
   _rem_set(NULL),
-  _dirty_card_queue_set(false),
+  _redirty_cards_queue_set(),
   _cm(NULL),
   _cm_thread(NULL),
   _cr(NULL),
@@ -1687,8 +1691,8 @@
                                                   &bs->dirty_card_queue_buffer_allocator(),
                                                   true); // init_free_ids
 
-  dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
-                                    &bs->dirty_card_queue_buffer_allocator());
+  // Use same buffer allocator as dirty card qset, to allow merging.
+  _redirty_cards_queue_set.initialize(&bs->dirty_card_queue_buffer_allocator());
 
   // Create the hot card cache.
   _hot_card_cache = new G1HotCardCache(this);
@@ -3213,18 +3217,43 @@
 
 class G1RedirtyLoggedCardsTask : public AbstractGangTask {
  private:
-  G1DirtyCardQueueSet* _queue;
+  G1RedirtyCardsQueueSet* _qset;
   G1CollectedHeap* _g1h;
+  BufferNode* volatile _nodes;
+
+  void apply(G1CardTableEntryClosure* cl, BufferNode* node, uint worker_id) {
+    void** buf = BufferNode::make_buffer_from_node(node);
+    size_t limit = _qset->buffer_size();
+    for (size_t i = node->index(); i < limit; ++i) {
+      CardTable::CardValue* card_ptr = static_cast<CardTable::CardValue*>(buf[i]);
+      bool result = cl->do_card_ptr(card_ptr, worker_id);
+      assert(result, "Closure should always return true");
+    }
+  }
+
+  void par_apply(G1CardTableEntryClosure* cl, uint worker_id) {
+    BufferNode* next = Atomic::load(&_nodes);
+    while (next != NULL) {
+      BufferNode* node = next;
+      next = Atomic::cmpxchg(node->next(), &_nodes, node);
+      if (next == node) {
+        apply(cl, node, worker_id);
+        next = node->next();
+      }
+    }
+  }
+
  public:
-  G1RedirtyLoggedCardsTask(G1DirtyCardQueueSet* queue, G1CollectedHeap* g1h) : AbstractGangTask("Redirty Cards"),
-    _queue(queue), _g1h(g1h) { }
+  G1RedirtyLoggedCardsTask(G1RedirtyCardsQueueSet* qset, G1CollectedHeap* g1h) :
+    AbstractGangTask("Redirty Cards"),
+    _qset(qset), _g1h(g1h), _nodes(qset->all_completed_buffers()) { }
 
   virtual void work(uint worker_id) {
     G1GCPhaseTimes* p = _g1h->phase_times();
     G1GCParPhaseTimesTracker x(p, G1GCPhaseTimes::RedirtyCards, worker_id);
 
     RedirtyLoggedCardTableEntryClosure cl(_g1h);
-    _queue->par_apply_closure_to_all_completed_buffers(&cl);
+    par_apply(&cl, worker_id);
 
     p->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_dirtied());
   }
@@ -3233,13 +3262,12 @@
 void G1CollectedHeap::redirty_logged_cards() {
   double redirty_logged_cards_start = os::elapsedTime();
 
-  G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set(), this);
-  dirty_card_queue_set().reset_for_par_iteration();
+  G1RedirtyLoggedCardsTask redirty_task(&redirty_cards_queue_set(), this);
   workers()->run_task(&redirty_task);
 
   G1DirtyCardQueueSet& dcq = G1BarrierSet::dirty_card_queue_set();
-  dcq.merge_bufferlists(&dirty_card_queue_set());
-  assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
+  dcq.merge_bufferlists(&redirty_cards_queue_set());
+  redirty_cards_queue_set().verify_empty();
 
   phase_times()->record_redirty_logged_cards_time_ms((os::elapsedTime() - redirty_logged_cards_start) * 1000.0);
 }
@@ -3571,7 +3599,7 @@
   // Should G1EvacuationFailureALot be in effect for this GC?
   NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();)
 
-  assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
+  redirty_cards_queue_set().verify_empty();
 }
 
 class G1EvacuateRegionsBaseTask : public AbstractGangTask {
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -31,7 +31,6 @@
 #include "gc/g1/g1CollectionSet.hpp"
 #include "gc/g1/g1CollectorState.hpp"
 #include "gc/g1/g1ConcurrentMark.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
 #include "gc/g1/g1EdenRegions.hpp"
 #include "gc/g1/g1EvacFailure.hpp"
 #include "gc/g1/g1EvacStats.hpp"
@@ -42,6 +41,7 @@
 #include "gc/g1/g1HRPrinter.hpp"
 #include "gc/g1/g1HeapRegionAttr.hpp"
 #include "gc/g1/g1MonitoringSupport.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
 #include "gc/g1/g1SurvivorRegions.hpp"
 #include "gc/g1/g1YCTypes.hpp"
 #include "gc/g1/heapRegionManager.hpp"
@@ -73,6 +73,7 @@
 class SpaceClosure;
 class CompactibleSpaceClosure;
 class Space;
+class G1CardTableEntryClosure;
 class G1CollectionSet;
 class G1Policy;
 class G1HotCardCache;
@@ -775,7 +776,7 @@
 
   // A set of cards that cover the objects for which the Rsets should be updated
   // concurrently after the collection.
-  G1DirtyCardQueueSet _dirty_card_queue_set;
+  G1RedirtyCardsQueueSet _redirty_cards_queue_set;
 
   // After a collection pause, convert the regions in the collection set into free
   // regions.
@@ -935,7 +936,9 @@
   uint num_task_queues() const;
 
   // A set of cards where updates happened during the GC
-  G1DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
+  G1RedirtyCardsQueueSet& redirty_cards_queue_set() {
+    return _redirty_cards_queue_set;
+  }
 
   // Create a G1CollectedHeap.
   // Must call the initialize method afterwards.
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -23,9 +23,11 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/g1/g1CardTableEntryClosure.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1DirtyCardQueue.hpp"
 #include "gc/g1/g1FreeIdSet.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
 #include "gc/g1/g1RemSet.hpp"
 #include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
@@ -90,8 +92,7 @@
   _completed_buffers_padding(0),
   _free_ids(NULL),
   _processed_buffers_mut(0),
-  _processed_buffers_rs_thread(0),
-  _cur_par_buffer_node(NULL)
+  _processed_buffers_rs_thread(0)
 {
   _all_active = true;
 }
@@ -211,26 +212,22 @@
 // Merge lists of buffers. Notify the processing threads.
 // The source queue is emptied as a result. The queues
 // must share the monitor.
-void G1DirtyCardQueueSet::merge_bufferlists(G1DirtyCardQueueSet *src) {
-  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
+void G1DirtyCardQueueSet::merge_bufferlists(G1RedirtyCardsQueueSet* src) {
+  assert(allocator() == src->allocator(), "precondition");
+  const G1RedirtyCardsBufferList from = src->take_all_completed_buffers();
+  if (from._head == NULL) return;
+
   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
   if (_completed_buffers_tail == NULL) {
     assert(_completed_buffers_head == NULL, "Well-formedness");
-    _completed_buffers_head = src->_completed_buffers_head;
-    _completed_buffers_tail = src->_completed_buffers_tail;
+    _completed_buffers_head = from._head;
+    _completed_buffers_tail = from._tail;
   } else {
     assert(_completed_buffers_head != NULL, "Well formedness");
-    if (src->_completed_buffers_head != NULL) {
-      _completed_buffers_tail->set_next(src->_completed_buffers_head);
-      _completed_buffers_tail = src->_completed_buffers_tail;
-    }
+    _completed_buffers_tail->set_next(from._head);
+    _completed_buffers_tail = from._tail;
   }
-  _n_completed_buffers += src->_n_completed_buffers;
-
-  src->_n_completed_buffers = 0;
-  src->_completed_buffers_head = NULL;
-  src->_completed_buffers_tail = NULL;
-  src->set_process_completed_buffers(false);
+  _n_completed_buffers += from._count;
 
   assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
          _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
@@ -240,7 +237,6 @@
 
 bool G1DirtyCardQueueSet::apply_closure_to_buffer(G1CardTableEntryClosure* cl,
                                                   BufferNode* node,
-                                                  bool consume,
                                                   uint worker_i) {
   if (cl == NULL) return true;
   bool result = true;
@@ -255,10 +251,8 @@
       break;
     }
   }
-  if (consume) {
-    assert(i <= buffer_size(), "invariant");
-    node->set_index(i);
-  }
+  assert(i <= buffer_size(), "invariant");
+  node->set_index(i);
   return result;
 }
 
@@ -299,7 +293,7 @@
 
   uint worker_i = _free_ids->claim_par_id(); // temporarily claim an id
   G1RefineCardConcurrentlyClosure cl;
-  bool result = apply_closure_to_buffer(&cl, node, true, worker_i);
+  bool result = apply_closure_to_buffer(&cl, node, worker_i);
   _free_ids->release_par_id(worker_i); // release the id
 
   if (result) {
@@ -328,7 +322,7 @@
   if (nd == NULL) {
     return false;
   } else {
-    if (apply_closure_to_buffer(cl, nd, true, worker_i)) {
+    if (apply_closure_to_buffer(cl, nd, worker_i)) {
       assert_fully_consumed(nd, buffer_size());
       // Done with fully processed buffer.
       deallocate_buffer(nd);
@@ -342,21 +336,6 @@
   }
 }
 
-void G1DirtyCardQueueSet::par_apply_closure_to_all_completed_buffers(G1CardTableEntryClosure* cl) {
-  BufferNode* nd = _cur_par_buffer_node;
-  while (nd != NULL) {
-    BufferNode* next = nd->next();
-    BufferNode* actual = Atomic::cmpxchg(next, &_cur_par_buffer_node, nd);
-    if (actual == nd) {
-      bool b = apply_closure_to_buffer(cl, nd, false);
-      guarantee(b, "Should not stop early.");
-      nd = next;
-    } else {
-      nd = actual;
-    }
-  }
-}
-
 void G1DirtyCardQueueSet::abandon_logs() {
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
   abandon_completed_buffers();
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -25,26 +25,16 @@
 #ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
 #define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
 
-#include "gc/shared/cardTable.hpp"
 #include "gc/shared/ptrQueue.hpp"
 #include "memory/allocation.hpp"
 
+class G1CardTableEntryClosure;
 class G1DirtyCardQueueSet;
 class G1FreeIdSet;
+class G1RedirtyCardsQueueSet;
 class Thread;
 class Monitor;
 
-// A closure class for processing card table entries.  Note that we don't
-// require these closure objects to be stack-allocated.
-class G1CardTableEntryClosure: public CHeapObj<mtGC> {
-public:
-  typedef CardTable::CardValue CardValue;
-
-  // Process the card whose card table entry is "card_ptr".  If returns
-  // "false", terminate the iteration early.
-  virtual bool do_card_ptr(CardValue* card_ptr, uint worker_i) = 0;
-};
-
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class G1DirtyCardQueue: public PtrQueue {
 protected:
@@ -95,12 +85,12 @@
   // buffer_size.  If all closure applications return true, then
   // returns true.  Stops processing after the first closure
   // application that returns false, and returns false from this
-  // function.  If "consume" is true, the node's index is updated to
-  // exclude the processed elements, e.g. up to the element for which
-  // the closure returned false.
+  // function.  The node's index is updated to exclude the processed
+  // elements, e.g. up to the element for which the closure returned
+  // false, or one past the last element if the closure always
+  // returned true.
   bool apply_closure_to_buffer(G1CardTableEntryClosure* cl,
                                BufferNode* node,
-                               bool consume,
                                uint worker_i = 0);
 
   // If there are more than stop_at completed buffers, pop one, apply
@@ -135,9 +125,6 @@
   jint _processed_buffers_mut;
   jint _processed_buffers_rs_thread;
 
-  // Current buffer node used for parallel iteration.
-  BufferNode* volatile _cur_par_buffer_node;
-
 public:
   G1DirtyCardQueueSet(bool notify_when_complete = true);
   ~G1DirtyCardQueueSet();
@@ -183,7 +170,7 @@
   // Notify the consumer if the number of buffers crossed the threshold
   void notify_if_necessary();
 
-  void merge_bufferlists(G1DirtyCardQueueSet* src);
+  void merge_bufferlists(G1RedirtyCardsQueueSet* src);
 
   // Apply G1RefineCardConcurrentlyClosure to completed buffers until there are stop_at
   // completed buffers remaining.
@@ -193,12 +180,6 @@
   // must never return false. Must only be called during GC.
   bool apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i);
 
-  void reset_for_par_iteration() { _cur_par_buffer_node = _completed_buffers_head; }
-  // Applies the current closure to all completed buffers, non-consumptively.
-  // Can be used in parallel, all callers using the iteration state initialized
-  // by reset_for_par_iteration.
-  void par_apply_closure_to_all_completed_buffers(G1CardTableEntryClosure* cl);
-
   // If a full collection is happening, reset partial logs, and release
   // completed ones: the full collection will make them all irrelevant.
   void abandon_logs();
--- a/src/hotspot/share/gc/g1/g1EvacFailure.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1EvacFailure.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -26,10 +26,10 @@
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1CollectorState.hpp"
 #include "gc/g1/g1ConcurrentMark.inline.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
 #include "gc/g1/g1EvacFailure.hpp"
 #include "gc/g1/g1HeapVerifier.hpp"
 #include "gc/g1/g1OopClosures.inline.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
 #include "gc/g1/heapRegion.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
@@ -40,7 +40,7 @@
 class UpdateLogBuffersDeferred : public BasicOopIterateClosure {
 private:
   G1CollectedHeap* _g1h;
-  G1DirtyCardQueue* _dcq;
+  G1RedirtyCardsQueue* _rdcq;
   G1CardTable*    _ct;
 
   // Remember the last enqueued card to avoid enqueuing the same card over and over;
@@ -48,8 +48,8 @@
   size_t _last_enqueued_card;
 
 public:
-  UpdateLogBuffersDeferred(G1DirtyCardQueue* dcq) :
-    _g1h(G1CollectedHeap::heap()), _dcq(dcq), _ct(_g1h->card_table()), _last_enqueued_card(SIZE_MAX) {}
+  UpdateLogBuffersDeferred(G1RedirtyCardsQueue* rdcq) :
+    _g1h(G1CollectedHeap::heap()), _rdcq(rdcq), _ct(_g1h->card_table()), _last_enqueued_card(SIZE_MAX) {}
 
   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
   virtual void do_oop(      oop* p) { do_oop_work(p); }
@@ -67,7 +67,7 @@
     }
     size_t card_index = _ct->index_for(p);
     if (card_index != _last_enqueued_card) {
-      _dcq->enqueue(_ct->byte_for_index(card_index));
+      _rdcq->enqueue(_ct->byte_for_index(card_index));
       _last_enqueued_card = card_index;
     }
   }
@@ -199,15 +199,15 @@
   G1CollectedHeap* _g1h;
   uint _worker_id;
 
-  G1DirtyCardQueue _dcq;
+  G1RedirtyCardsQueue _rdcq;
   UpdateLogBuffersDeferred _log_buffer_cl;
 
 public:
   RemoveSelfForwardPtrHRClosure(uint worker_id) :
     _g1h(G1CollectedHeap::heap()),
     _worker_id(worker_id),
-    _dcq(&_g1h->dirty_card_queue_set()),
-    _log_buffer_cl(&_dcq) {
+    _rdcq(&_g1h->redirty_cards_queue_set()),
+    _log_buffer_cl(&_rdcq) {
   }
 
   size_t remove_self_forward_ptr_by_walking_hr(HeapRegion* hr,
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/g1/g1CardTableEntryClosure.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1DirtyCardQueue.hpp"
 #include "gc/g1/g1HotCardCache.hpp"
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -43,7 +43,7 @@
                                            size_t optional_cset_length)
   : _g1h(g1h),
     _refs(g1h->task_queue(worker_id)),
-    _dcq(&g1h->dirty_card_queue_set()),
+    _rdcq(&g1h->redirty_cards_queue_set()),
     _ct(g1h->card_table()),
     _closures(NULL),
     _plab_allocator(NULL),
@@ -88,7 +88,7 @@
 
 // Pass locally gathered statistics to global state.
 void G1ParScanThreadState::flush(size_t* surviving_young_words) {
-  _dcq.flush();
+  _rdcq.flush();
   // Update allocation statistics.
   _plab_allocator->flush_and_retire_stats();
   _g1h->policy()->record_age_table(&_age_table);
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -27,7 +27,7 @@
 
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1CollectedHeap.hpp"
-#include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
 #include "gc/g1/g1OopClosures.hpp"
 #include "gc/g1/g1Policy.hpp"
 #include "gc/g1/g1RemSet.hpp"
@@ -46,7 +46,7 @@
 class G1ParScanThreadState : public CHeapObj<mtGC> {
   G1CollectedHeap* _g1h;
   RefToScanQueue* _refs;
-  G1DirtyCardQueue _dcq;
+  G1RedirtyCardsQueue _rdcq;
   G1CardTable* _ct;
   G1EvacuationRootClosures* _closures;
 
@@ -81,7 +81,7 @@
 
 #define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
 
-  G1DirtyCardQueue& dirty_card_queue()           { return _dcq; }
+  G1RedirtyCardsQueue& redirty_cards_queue()     { return _rdcq; }
   G1CardTable* ct()                              { return _ct; }
 
   G1HeapRegionAttr dest(G1HeapRegionAttr original) const {
@@ -133,7 +133,7 @@
     size_t card_index = ct()->index_for(p);
     // If the card hasn't been added to the buffer, do it.
     if (_last_enqueued_card != card_index) {
-      dirty_card_queue().enqueue(ct()->byte_for_index(card_index));
+      redirty_cards_queue().enqueue(ct()->byte_for_index(card_index));
       _last_enqueued_card = card_index;
     }
   }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/g1RedirtyCardsQueue.hpp"
+#include "runtime/atomic.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/macros.hpp"
+
+// G1RedirtyCardsBufferList
+
+G1RedirtyCardsBufferList::G1RedirtyCardsBufferList() :
+  _head(NULL), _tail(NULL), _count(0) {}
+
+G1RedirtyCardsBufferList::G1RedirtyCardsBufferList(BufferNode* head,
+                                                   BufferNode* tail,
+                                                   size_t count) :
+  _head(head), _tail(tail), _count(count)
+{
+  assert((_head == NULL) == (_tail == NULL), "invariant");
+  assert((_head == NULL) == (_count == 0), "invariant");
+}
+
+// G1RedirtyCardsQueueBase::LocalQSet
+
+G1RedirtyCardsQueueBase::LocalQSet::LocalQSet(G1RedirtyCardsQueueSet* shared_qset) :
+  PtrQueueSet(),
+  _shared_qset(shared_qset),
+  _buffers()
+{
+  PtrQueueSet::initialize(_shared_qset->allocator());
+}
+
+G1RedirtyCardsQueueBase::LocalQSet::~LocalQSet() {
+  assert(_buffers._head == NULL, "unflushed qset");
+  assert(_buffers._tail == NULL, "invariant");
+  assert(_buffers._count == 0, "invariant");
+}
+
+void G1RedirtyCardsQueueBase::LocalQSet::enqueue_completed_buffer(BufferNode* node) {
+  ++_buffers._count;
+  node->set_next(_buffers._head);
+  _buffers._head = node;
+  if (_buffers._tail == NULL) {
+    _buffers._tail = node;
+  }
+}
+
+G1RedirtyCardsBufferList
+G1RedirtyCardsQueueBase::LocalQSet::take_all_completed_buffers() {
+  G1RedirtyCardsBufferList result = _buffers;
+  _buffers = G1RedirtyCardsBufferList();
+  return result;
+}
+
+void G1RedirtyCardsQueueBase::LocalQSet::flush() {
+  _shared_qset->merge_bufferlist(this);
+}
+
+// G1RedirtyCardsQueue
+
+G1RedirtyCardsQueue::G1RedirtyCardsQueue(G1RedirtyCardsQueueSet* qset) :
+  G1RedirtyCardsQueueBase(qset), // Init _local_qset before passing to PtrQueue.
+  PtrQueue(&_local_qset, true /* active (always) */)
+{}
+
+G1RedirtyCardsQueue::~G1RedirtyCardsQueue() {
+  flush();
+}
+
+void G1RedirtyCardsQueue::handle_completed_buffer() {
+  enqueue_completed_buffer();
+}
+
+void G1RedirtyCardsQueue::flush() {
+  flush_impl();
+  _local_qset.flush();
+}
+
+// G1RedirtyCardsQueueSet
+
+G1RedirtyCardsQueueSet::G1RedirtyCardsQueueSet() :
+  PtrQueueSet(),
+  _list(),
+  _count(0),
+  _tail(NULL)
+  DEBUG_ONLY(COMMA _collecting(true))
+{}
+
+G1RedirtyCardsQueueSet::~G1RedirtyCardsQueueSet() {
+  verify_empty();
+}
+
+#ifdef ASSERT
+void G1RedirtyCardsQueueSet::verify_empty() const {
+  assert(_list.empty(), "precondition");
+  assert(_tail == NULL, "invariant");
+  assert(_count == 0, "invariant");
+}
+#endif // ASSERT
+
+BufferNode* G1RedirtyCardsQueueSet::all_completed_buffers() const {
+  DEBUG_ONLY(_collecting = false;)
+  return _list.top();
+}
+
+G1RedirtyCardsBufferList G1RedirtyCardsQueueSet::take_all_completed_buffers() {
+  DEBUG_ONLY(_collecting = false;)
+  G1RedirtyCardsBufferList result(_list.pop_all(), _tail, _count);
+  _tail = NULL;
+  _count = 0;
+  DEBUG_ONLY(_collecting = true;)
+  return result;
+}
+
+void G1RedirtyCardsQueueSet::update_tail(BufferNode* node) {
+  // Node is the tail of a (possibly single element) list just prepended to
+  // _list.  If, after that prepend, node's follower is NULL, then node is
+  // also the tail of _list, so record it as such.
+  if (node->next() == NULL) {
+    assert(_tail == NULL, "invariant");
+    _tail = node;
+  }
+}
+
+void G1RedirtyCardsQueueSet::enqueue_completed_buffer(BufferNode* node) {
+  assert(_collecting, "precondition");
+  Atomic::inc(&_count);
+  _list.push(*node);
+  update_tail(node);
+}
+
+void G1RedirtyCardsQueueSet::merge_bufferlist(LocalQSet* src) {
+  assert(_collecting, "precondition");
+  const G1RedirtyCardsBufferList from = src->take_all_completed_buffers();
+  if (from._head != NULL) {
+    assert(from._tail != NULL, "invariant");
+    Atomic::add(from._count, &_count);
+    _list.prepend(*from._head, *from._tail);
+    update_tail(from._tail);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/gc/g1/g1RedirtyCardsQueue.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
+#define SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
+
+#include "gc/shared/ptrQueue.hpp"
+#include "memory/allocation.hpp"
+#include "memory/padded.hpp"
+
+class G1CardTableEntryClosure;
+class G1RedirtyCardsQueue;
+class G1RedirtyCardsQueueSet;
+
+struct G1RedirtyCardsBufferList {
+  BufferNode* _head;
+  BufferNode* _tail;
+  size_t _count;
+
+  G1RedirtyCardsBufferList();
+  G1RedirtyCardsBufferList(BufferNode* head, BufferNode* tail, size_t count);
+};
+
+// Provide G1RedirtyCardsQueue with a thread-local qset.  It provides an
+// uncontended staging area for completed buffers, to be flushed to the
+// shared qset en masse.  Using the "base from member" idiom so the local
+// qset is constructed before being passed to the PtrQueue constructor.
+class G1RedirtyCardsQueueBase {
+  friend class G1RedirtyCardsQueue;
+  friend class G1RedirtyCardsQueueSet;
+
+  class LocalQSet : public PtrQueueSet {
+    G1RedirtyCardsQueueSet* _shared_qset;
+    G1RedirtyCardsBufferList _buffers;
+
+  public:
+    LocalQSet(G1RedirtyCardsQueueSet* shared_qset);
+    ~LocalQSet();
+
+    // Add the buffer to the local list.
+    virtual void enqueue_completed_buffer(BufferNode* node);
+
+    // Transfer all completed buffers to the shared qset.
+    void flush();
+
+    G1RedirtyCardsBufferList take_all_completed_buffers();
+  };
+
+  G1RedirtyCardsQueueBase(G1RedirtyCardsQueueSet* shared_qset) :
+    _local_qset(shared_qset) {}
+
+  ~G1RedirtyCardsQueueBase() {}
+
+  LocalQSet _local_qset;
+};
+
+// Worker-local queues of card table entries.
+class G1RedirtyCardsQueue : private G1RedirtyCardsQueueBase, public PtrQueue {
+protected:
+  virtual void handle_completed_buffer();
+
+public:
+  G1RedirtyCardsQueue(G1RedirtyCardsQueueSet* qset);
+
+  // Flushes the queue.
+  ~G1RedirtyCardsQueue();
+
+  // Flushes all enqueued cards to qset.
+  void flush();
+};
+
+// Card table entries to be redirtied and the cards reprocessed later.
+// Has two phases, collecting and processing.  During the collecting
+// phase buffers are added to the set.  Once collecting is complete and
+// processing starts, buffers can no longer be added.  Taking all the
+// collected (and processed) buffers reverts back to collecting, allowing
+// the set to be reused for another round of redirtying.
+class G1RedirtyCardsQueueSet : public PtrQueueSet {
+  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, 0);
+  BufferNode::Stack _list;
+  DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(size_t));
+  volatile size_t _count;
+  DEFINE_PAD_MINUS_SIZE(3, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
+  BufferNode* _tail;
+  DEBUG_ONLY(mutable bool _collecting;)
+
+  typedef G1RedirtyCardsQueueBase::LocalQSet LocalQSet;
+
+  void update_tail(BufferNode* node);
+
+public:
+  G1RedirtyCardsQueueSet();
+  ~G1RedirtyCardsQueueSet();
+
+  using PtrQueueSet::initialize;
+
+  void verify_empty() const NOT_DEBUG_RETURN;
+
+  // Collect buffers.  These functions are thread-safe.
+  // precondition: Must not be concurrent with buffer processing.
+  virtual void enqueue_completed_buffer(BufferNode* node);
+  void merge_bufferlist(LocalQSet* src);
+
+  // Processing phase operations.
+  // precondition: Must not be concurrent with buffer collection.
+  BufferNode* all_completed_buffers() const;
+  G1RedirtyCardsBufferList take_all_completed_buffers();
+};
+
+#endif // SHARE_GC_G1_G1REDIRTYCARDSQUEUE_HPP
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Jul 19 16:47:11 2019 -0400
@@ -26,6 +26,7 @@
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc/g1/g1CardTable.inline.hpp"
+#include "gc/g1/g1CardTableEntryClosure.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1ConcurrentRefine.hpp"
 #include "gc/g1/g1DirtyCardQueue.hpp"
--- a/src/hotspot/share/gc/shared/ptrQueue.hpp	Fri Jul 19 12:39:31 2019 -0400
+++ b/src/hotspot/share/gc/shared/ptrQueue.hpp	Fri Jul 19 16:47:11 2019 -0400
@@ -296,6 +296,10 @@
 class PtrQueueSet {
   BufferNode::Allocator* _allocator;
 
+  // Noncopyable - not defined.
+  PtrQueueSet(const PtrQueueSet&);
+  PtrQueueSet& operator=(const PtrQueueSet&);
+
 protected:
   bool _all_active;
 
@@ -309,6 +313,9 @@
 
 public:
 
+  // Return the associated BufferNode allocator.
+  BufferNode::Allocator* allocator() const { return _allocator; }
+
   // Return the buffer for a BufferNode of size buffer_size().
   void** allocate_buffer();