8153507: Improve Card Table Clear Task
authortschatzl
Mon, 02 May 2016 10:24:41 +0200
changeset 38154 4c30333c304e
parent 38153 4f13f0b690c3
child 38155 a4501a2965dc
8153507: Improve Card Table Clear Task Summary: Move card table clear code into remembered set related files. Improve work distribution of this task, and tune thread usage. Reviewed-by: jmasa, mgerdin
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc/g1/g1RemSet.cpp
hotspot/src/share/vm/gc/g1/heapRegion.cpp
hotspot/src/share/vm/gc/g1/heapRegion.hpp
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon May 02 10:24:41 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon May 02 10:24:41 2016 +0200
@@ -163,59 +163,6 @@
   reset_from_card_cache(start_idx, num_regions);
 }
 
-void G1CollectedHeap::push_dirty_cards_region(HeapRegion* hr)
-{
-  // Claim the right to put the region on the dirty cards region list
-  // by installing a self pointer.
-  HeapRegion* next = hr->get_next_dirty_cards_region();
-  if (next == NULL) {
-    HeapRegion* res = (HeapRegion*)
-      Atomic::cmpxchg_ptr(hr, hr->next_dirty_cards_region_addr(),
-                          NULL);
-    if (res == NULL) {
-      HeapRegion* head;
-      do {
-        // Put the region to the dirty cards region list.
-        head = _dirty_cards_region_list;
-        next = (HeapRegion*)
-          Atomic::cmpxchg_ptr(hr, &_dirty_cards_region_list, head);
-        if (next == head) {
-          assert(hr->get_next_dirty_cards_region() == hr,
-                 "hr->get_next_dirty_cards_region() != hr");
-          if (next == NULL) {
-            // The last region in the list points to itself.
-            hr->set_next_dirty_cards_region(hr);
-          } else {
-            hr->set_next_dirty_cards_region(next);
-          }
-        }
-      } while (next != head);
-    }
-  }
-}
-
-HeapRegion* G1CollectedHeap::pop_dirty_cards_region()
-{
-  HeapRegion* head;
-  HeapRegion* hr;
-  do {
-    head = _dirty_cards_region_list;
-    if (head == NULL) {
-      return NULL;
-    }
-    HeapRegion* new_head = head->get_next_dirty_cards_region();
-    if (head == new_head) {
-      // The last region.
-      new_head = NULL;
-    }
-    hr = (HeapRegion*)Atomic::cmpxchg_ptr(new_head, &_dirty_cards_region_list,
-                                          head);
-  } while (hr != head);
-  assert(hr != NULL, "invariant");
-  hr->set_next_dirty_cards_region(NULL);
-  return hr;
-}
-
 // Returns true if the reference points to an object that
 // can move in an incremental collection.
 bool G1CollectedHeap::is_scavengable(const void* p) {
@@ -1777,7 +1724,6 @@
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _in_cset_fast_test(),
-  _dirty_cards_region_list(NULL),
   _worker_cset_start_region(NULL),
   _worker_cset_start_region_time_stamp(NULL),
   _gc_timer_stw(new (ResourceObj::C_HEAP, mtGC) STWGCTimer()),
@@ -4743,31 +4689,6 @@
   decrease_used(bytes);
 }
 
-class G1ParCleanupCTTask : public AbstractGangTask {
-  G1SATBCardTableModRefBS* _ct_bs;
-  G1CollectedHeap* _g1h;
-  HeapRegion* volatile _su_head;
-public:
-  G1ParCleanupCTTask(G1SATBCardTableModRefBS* ct_bs,
-                     G1CollectedHeap* g1h) :
-    AbstractGangTask("G1 Par Cleanup CT Task"),
-    _ct_bs(ct_bs), _g1h(g1h) { }
-
-  void work(uint worker_id) {
-    HeapRegion* r;
-    while (r = _g1h->pop_dirty_cards_region()) {
-      clear_cards(r);
-    }
-  }
-
-  void clear_cards(HeapRegion* r) {
-    // Cards of the survivors should have already been dirtied.
-    if (!r->is_survivor()) {
-      _ct_bs->clear(MemRegion(r->bottom(), r->end()));
-    }
-  }
-};
-
 class G1ParScrubRemSetTask: public AbstractGangTask {
 protected:
   G1RemSet* _g1rs;
@@ -4791,27 +4712,6 @@
   workers()->run_task(&g1_par_scrub_rs_task);
 }
 
-void G1CollectedHeap::cleanUpCardTable() {
-  G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
-  double start = os::elapsedTime();
-
-  {
-    // Iterate over the dirty cards region list.
-    G1ParCleanupCTTask cleanup_task(ct_bs, this);
-
-    workers()->run_task(&cleanup_task);
-#ifndef PRODUCT
-    // Need to synchronize with concurrent cleanup since it needs to
-    // finish its card table clearing before we can verify.
-    wait_while_free_regions_coming();
-    _verifier->verify_card_table_cleanup();
-#endif
-  }
-
-  double elapsed = os::elapsedTime() - start;
-  g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0);
-}
-
 void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words) {
   size_t pre_used = 0;
   FreeRegionList local_free_list("Local List for CSet Freeing");
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Mon May 02 10:24:41 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Mon May 02 10:24:41 2016 +0200
@@ -36,6 +36,7 @@
 #include "gc/g1/g1MonitoringSupport.hpp"
 #include "gc/g1/g1EvacFailure.hpp"
 #include "gc/g1/g1EvacStats.hpp"
+#include "gc/g1/g1HeapVerifier.hpp"
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/g1YCTypes.hpp"
 #include "gc/g1/hSpaceCounters.hpp"
@@ -1165,10 +1166,6 @@
     return barrier_set_cast<G1SATBCardTableLoggingModRefBS>(barrier_set());
   }
 
-  // This resets the card table to all zeros.  It is used after
-  // a collection pause which used the card table to claim cards.
-  void cleanUpCardTable();
-
   // Iteration functions.
 
   // Iterate over all objects, calling "cl.do_object" on each.
@@ -1394,16 +1391,6 @@
 
   ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
 
-  // The dirty cards region list is used to record a subset of regions
-  // whose cards need clearing. The list if populated during the
-  // remembered set scanning and drained during the card table
-  // cleanup. Although the methods are reentrant, population/draining
-  // phases must not overlap. For synchronization purposes the last
-  // element on the list points to itself.
-  HeapRegion* _dirty_cards_region_list;
-  void push_dirty_cards_region(HeapRegion* hr);
-  HeapRegion* pop_dirty_cards_region();
-
   // Optimized nmethod scanning support routines
 
   // Register the given nmethod with the G1 heap.
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Mon May 02 10:24:41 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Mon May 02 10:24:41 2016 +0200
@@ -48,6 +48,47 @@
 // Collects information about the overall remembered set scan progress during an evacuation.
 class G1RemSetScanState : public CHeapObj<mtGC> {
 private:
+  class G1ClearCardTableTask : public AbstractGangTask {
+    G1CollectedHeap* _g1h;
+    uint* _dirty_region_list;
+    size_t _num_dirty_regions;
+    size_t _chunk_length;
+
+    size_t volatile _cur_dirty_regions;
+  public:
+    G1ClearCardTableTask(G1CollectedHeap* g1h,
+                         uint* dirty_region_list,
+                         size_t num_dirty_regions,
+                         size_t chunk_length) :
+      AbstractGangTask("G1 Clear Card Table Task"),
+      _g1h(g1h),
+      _dirty_region_list(dirty_region_list),
+      _num_dirty_regions(num_dirty_regions),
+      _chunk_length(chunk_length),
+      _cur_dirty_regions(0) {
+
+      assert(chunk_length > 0, "must be");
+    }
+
+    static size_t chunk_size() { return M; }
+
+    void work(uint worker_id) {
+      G1SATBCardTableModRefBS* ct_bs = _g1h->g1_barrier_set();
+
+      while (_cur_dirty_regions < _num_dirty_regions) {
+        size_t next = Atomic::add(_chunk_length, &_cur_dirty_regions) - _chunk_length;
+        size_t max = MIN2(next + _chunk_length, _num_dirty_regions);
+
+        for (size_t i = next; i < max; i++) {
+          HeapRegion* r = _g1h->region_at(_dirty_region_list[i]);
+          if (!r->is_survivor()) {
+            ct_bs->clear(MemRegion(r->bottom(), r->end()));
+          }
+        }
+      }
+    }
+  };
+
   size_t _max_regions;
 
   // Scan progress for the remembered set of a single region. Transitions from
@@ -65,11 +106,25 @@
   // remembered set.
   size_t volatile* _iter_claims;
 
+  // Temporary buffer holding the regions we used to store remembered set scan duplicate
+  // information. These are also called "dirty". Valid entries are from [0.._cur_dirty_region)
+  uint* _dirty_region_buffer;
+
+  typedef jbyte IsDirtyRegionState;
+  static const IsDirtyRegionState Clean = 0;
+  static const IsDirtyRegionState Dirty = 1;
+  // Holds a flag for every region whether it is in the _dirty_region_buffer already
+  // to avoid duplicates. Uses jbyte since there are no atomic instructions for bools.
+  IsDirtyRegionState* _in_dirty_region_buffer;
+  size_t _cur_dirty_region;
 public:
   G1RemSetScanState() :
     _max_regions(0),
     _iter_states(NULL),
-    _iter_claims(NULL) {
+    _iter_claims(NULL),
+    _dirty_region_buffer(NULL),
+    _in_dirty_region_buffer(NULL),
+    _cur_dirty_region(0) {
 
   }
 
@@ -80,6 +135,12 @@
     if (_iter_claims != NULL) {
       FREE_C_HEAP_ARRAY(size_t, _iter_claims);
     }
+    if (_dirty_region_buffer != NULL) {
+      FREE_C_HEAP_ARRAY(uint, _dirty_region_buffer);
+    }
+    if (_in_dirty_region_buffer != NULL) {
+      FREE_C_HEAP_ARRAY(IsDirtyRegionState, _in_dirty_region_buffer);
+    }
   }
 
   void initialize(uint max_regions) {
@@ -88,6 +149,8 @@
     _max_regions = max_regions;
     _iter_states = NEW_C_HEAP_ARRAY(G1RemsetIterState, max_regions, mtGC);
     _iter_claims = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
+    _dirty_region_buffer = NEW_C_HEAP_ARRAY(uint, max_regions, mtGC);
+    _in_dirty_region_buffer = NEW_C_HEAP_ARRAY(IsDirtyRegionState, max_regions, mtGC);
   }
 
   void reset() {
@@ -95,6 +158,8 @@
       _iter_states[i] = Unclaimed;
     }
     memset((void*)_iter_claims, 0, _max_regions * sizeof(size_t));
+    memset(_in_dirty_region_buffer, Clean, _max_regions * sizeof(IsDirtyRegionState));
+    _cur_dirty_region = 0;
   }
 
   // Attempt to claim the remembered set of the region for iteration. Returns true
@@ -135,6 +200,44 @@
   inline size_t iter_claimed_next(uint region, size_t step) {
     return Atomic::add(step, &_iter_claims[region]) - step;
   }
+
+  void add_dirty_region(uint region) {
+    if (_in_dirty_region_buffer[region] == Dirty) {
+      return;
+    }
+
+    bool marked_as_dirty = Atomic::cmpxchg(Dirty, &_in_dirty_region_buffer[region], Clean) == Clean;
+    if (marked_as_dirty) {
+      size_t allocated = Atomic::add(1, &_cur_dirty_region) - 1;
+      _dirty_region_buffer[allocated] = region;
+    }
+  }
+
+  // Clear the card table of "dirty" regions.
+  void clear_card_table(WorkGang* workers) {
+   if (_cur_dirty_region == 0) {
+     return;
+   }
+
+   size_t const num_chunks = align_size_up(_cur_dirty_region * HeapRegion::CardsPerRegion, G1ClearCardTableTask::chunk_size()) / G1ClearCardTableTask::chunk_size();
+   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
+   size_t const chunk_length = G1ClearCardTableTask::chunk_size() / HeapRegion::CardsPerRegion;
+
+   // Iterate over the dirty cards region list.
+   G1ClearCardTableTask cl(G1CollectedHeap::heap(), _dirty_region_buffer, _cur_dirty_region, chunk_length);
+
+   log_debug(gc, ergo)("Running %s using %u workers for " SIZE_FORMAT " "
+                       "units of work for " SIZE_FORMAT " regions.",
+                       cl.name(), num_workers, num_chunks, _cur_dirty_region);
+   workers->run_task(&cl, num_workers);
+
+#ifndef PRODUCT
+   // Need to synchronize with concurrent cleanup since it needs to
+   // finish its card table clearing before we can verify.
+   G1CollectedHeap::heap()->wait_while_free_regions_coming();
+   G1CollectedHeap::heap()->verifier()->verify_card_table_cleanup();
+#endif
+  }
 };
 
 G1RemSet::G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs) :
@@ -237,7 +340,7 @@
     // If we ever free the collection set concurrently, we should also
     // clear the card table concurrently therefore we won't need to
     // add regions of the collection set to the dirty cards region.
-    _g1h->push_dirty_cards_region(r);
+    _scan_state->add_dirty_region(region_idx);
   }
 
   HeapRegionRemSetIterator iter(r->rem_set());
@@ -258,9 +361,7 @@
     HeapRegion* card_region = _g1h->heap_region_containing(card_start);
     _cards++;
 
-    if (!card_region->is_on_dirty_cards_region_list()) {
-      _g1h->push_dirty_cards_region(card_region);
-    }
+    _scan_state->add_dirty_region(card_region->hrm_index());
 
     // If the card is dirty, then we will scan it during updateRS.
     if (!card_region->in_collection_set() &&
@@ -376,10 +477,14 @@
 }
 
 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
+  G1GCPhaseTimes* phase_times = _g1->g1_policy()->phase_times();
   // Cleanup after copy
   _g1->set_refine_cte_cl_concurrency(true);
+
   // Set all cards back to clean.
-  _g1->cleanUpCardTable();
+  double start = os::elapsedTime();
+  _scan_state->clear_card_table(_g1->workers());
+  phase_times->record_clear_ct_time((os::elapsedTime() - start) * 1000.0);
 
   DirtyCardQueueSet& into_cset_dcqs = _into_cset_dirty_card_queue_set;
 
@@ -391,7 +496,7 @@
     // used to hold cards that contain references that point into the collection set
     // to the DCQS used to hold the deferred RS updates.
     _g1->dirty_card_queue_set().merge_bufferlists(&into_cset_dcqs);
-    _g1->g1_policy()->phase_times()->record_evac_fail_restore_remsets((os::elapsedTime() - restore_remembered_set_start) * 1000.0);
+    phase_times->record_evac_fail_restore_remsets((os::elapsedTime() - restore_remembered_set_start) * 1000.0);
   }
 
   // Free any completed buffers in the DirtyCardQueueSet used to hold cards
--- a/hotspot/src/share/vm/gc/g1/heapRegion.cpp	Mon May 02 10:24:41 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.cpp	Mon May 02 10:24:41 2016 +0200
@@ -288,7 +288,7 @@
     _evacuation_failed(false),
     _prev_marked_bytes(0), _next_marked_bytes(0), _gc_efficiency(0.0),
     _next_young_region(NULL),
-    _next_dirty_cards_region(NULL), _next(NULL), _prev(NULL),
+    _next(NULL), _prev(NULL),
 #ifdef ASSERT
     _containing_set(NULL),
 #endif // ASSERT
--- a/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Mon May 02 10:24:41 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/heapRegion.hpp	Mon May 02 10:24:41 2016 +0200
@@ -270,9 +270,6 @@
   // next region in the young "generation" region set
   HeapRegion* _next_young_region;
 
-  // Next region whose cards need cleaning
-  HeapRegion* _next_dirty_cards_region;
-
   // Fields used by the HeapRegionSetBase class and subclasses.
   HeapRegion* _next;
   HeapRegion* _prev;
@@ -531,11 +528,6 @@
     _next_young_region = hr;
   }
 
-  HeapRegion* get_next_dirty_cards_region() const { return _next_dirty_cards_region; }
-  HeapRegion** next_dirty_cards_region_addr() { return &_next_dirty_cards_region; }
-  void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; }
-  bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; }
-
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space, bool locked = false);
   void par_clear();