8135154: Move cards scanned and surviving young words aggregation to G1ParScanThreadStateSet
authormgerdin
Wed, 09 Sep 2015 14:22:45 +0200
changeset 32737 f02118695c2f
parent 32736 755024a84282
child 32738 a1adf25202fd
child 32742 c47e215ba55b
8135154: Move cards scanned and surviving young words aggregation to G1ParScanThreadStateSet Reviewed-by: tschatzl, ehelin
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp
hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp
hotspot/src/share/vm/gc/g1/g1ParScanThreadState_ext.cpp
hotspot/src/share/vm/gc/g1/g1RemSet.cpp
hotspot/src/share/vm/gc/g1/g1RemSet.hpp
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Sep 09 14:22:45 2015 +0200
@@ -2025,7 +2025,6 @@
   _survivor_evac_stats(YoungPLABSize, PLABWeight),
   _old_evac_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
-  _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _heap_summary_sent(false),
@@ -3698,10 +3697,6 @@
   return (buffer_size * buffer_num + extra_cards) / oopSize;
 }
 
-size_t G1CollectedHeap::cards_scanned() {
-  return g1_rem_set()->cardsScanned();
-}
-
 class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
  private:
   size_t _total_humongous;
@@ -3842,36 +3837,6 @@
   cl.flush_rem_set_entries();
 }
 
-void G1CollectedHeap::setup_surviving_young_words() {
-  assert(_surviving_young_words == NULL, "pre-condition");
-  uint array_length = g1_policy()->young_cset_region_length();
-  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC);
-  if (_surviving_young_words == NULL) {
-    vm_exit_out_of_memory(sizeof(size_t) * array_length, OOM_MALLOC_ERROR,
-                          "Not enough space for young surv words summary.");
-  }
-  memset(_surviving_young_words, 0, (size_t) array_length * sizeof(size_t));
-#ifdef ASSERT
-  for (uint i = 0;  i < array_length; ++i) {
-    assert( _surviving_young_words[i] == 0, "memset above" );
-  }
-#endif // !ASSERT
-}
-
-void G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
-  assert_at_safepoint(true);
-  uint array_length = g1_policy()->young_cset_region_length();
-  for (uint i = 0; i < array_length; ++i) {
-    _surviving_young_words[i] += surv_young_words[i];
-  }
-}
-
-void G1CollectedHeap::cleanup_surviving_young_words() {
-  guarantee( _surviving_young_words != NULL, "pre-condition" );
-  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
-  _surviving_young_words = NULL;
-}
-
 #ifdef ASSERT
 class VerifyCSetClosure: public HeapRegionClosure {
 public:
@@ -4159,23 +4124,20 @@
         collection_set_iterate(&cl);
 #endif // ASSERT
 
-        setup_surviving_young_words();
-
         // Initialize the GC alloc regions.
         _allocator->init_gc_alloc_regions(evacuation_info);
 
-        G1ParScanThreadStateSet per_thread_states(this, workers()->active_workers());
+        G1ParScanThreadStateSet per_thread_states(this, workers()->active_workers(), g1_policy()->young_cset_region_length());
         // Actually do the work...
         evacuate_collection_set(evacuation_info, &per_thread_states);
 
-        free_collection_set(g1_policy()->collection_set(), evacuation_info);
+        const size_t* surviving_young_words = per_thread_states.surviving_young_words();
+        free_collection_set(g1_policy()->collection_set(), evacuation_info, surviving_young_words);
 
         eagerly_reclaim_humongous_regions();
 
         g1_policy()->clear_collection_set();
 
-        cleanup_surviving_young_words();
-
         // Start a new incremental collection set for the next pause.
         g1_policy()->start_incremental_cset_building();
 
@@ -4260,7 +4222,8 @@
         // investigate this in CR 7178365.
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        g1_policy()->record_collection_pause_end(pause_time_ms);
+        size_t total_cards_scanned = per_thread_states.total_cards_scanned();
+        g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned);
 
         evacuation_info.set_collectionset_used_before(g1_policy()->collection_set_bytes_used_before());
         evacuation_info.set_bytes_copied(g1_policy()->bytes_copied_during_gc());
@@ -4669,9 +4632,12 @@
                                       worker_id);
 
       G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-      _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
-                                                      weak_root_cl,
-                                                      worker_id);
+      size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
+                                                                             weak_root_cl,
+                                                                             worker_id);
+
+      _pss->add_cards_scanned(worker_id, cards_scanned);
+
       double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;
 
       double term_sec = 0.0;
@@ -6050,7 +6016,7 @@
   g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0);
 }
 
-void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info) {
+void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words) {
   size_t pre_used = 0;
   FreeRegionList local_free_list("Local List for CSet Freeing");
 
@@ -6104,7 +6070,7 @@
       int index = cur->young_index_in_cset();
       assert(index != -1, "invariant");
       assert((uint) index < policy->young_cset_region_length(), "invariant");
-      size_t words_survived = _surviving_young_words[index];
+      size_t words_survived = surviving_young_words[index];
       cur->record_surv_words_in_group(words_survived);
 
       // At this point the we have 'popped' cur from the collection set
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Sep 09 14:22:45 2015 +0200
@@ -311,14 +311,8 @@
 
   volatile unsigned _gc_time_stamp;
 
-  size_t* _surviving_young_words;
-
   G1HRPrinter _hr_printer;
 
-  void setup_surviving_young_words();
-  void update_surviving_young_words(size_t* surv_young_words);
-  void cleanup_surviving_young_words();
-
   // It decides whether an explicit GC should start a concurrent cycle
   // instead of doing a STW GC. Currently, a concurrent cycle is
   // explicitly started if:
@@ -832,7 +826,7 @@
 
   // After a collection pause, make the regions in the CS into free
   // regions.
-  void free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info);
+  void free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words);
 
   // Abandon the current collection set without recording policy
   // statistics or updating free lists.
@@ -1609,7 +1603,6 @@
 
 public:
   size_t pending_card_num();
-  size_t cards_scanned();
 
 protected:
   size_t _max_heap_capacity;
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Wed Sep 09 14:22:45 2015 +0200
@@ -923,7 +923,7 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms) {
+void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned) {
   double end_time_sec = os::elapsedTime();
   assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
          "otherwise, the subtraction below does not make sense");
@@ -1052,8 +1052,6 @@
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
-    size_t cards_scanned = _g1->cards_scanned();
-
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
       cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Wed Sep 09 14:22:45 2015 +0200
@@ -634,7 +634,7 @@
 
   // Record the start and end of an evacuation pause.
   void record_collection_pause_start(double start_time_sec);
-  void record_collection_pause_end(double pause_time_ms);
+  void record_collection_pause_end(double pause_time_ms, size_t cards_scanned);
 
   // Record the start and end of a full collection.
   void record_full_collection_start();
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Sep 09 14:22:45 2015 +0200
@@ -32,7 +32,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, size_t young_cset_length)
   : _g1h(g1h),
     _refs(g1h->task_queue(worker_id)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -51,8 +51,8 @@
   // non-young regions (where the age is -1)
   // We also add a few elements at the beginning and at the end in
   // an attempt to eliminate cache contention
-  uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
-  uint array_length = PADDING_ELEM_NUM +
+  size_t real_length = 1 + young_cset_length;
+  size_t array_length = PADDING_ELEM_NUM +
                       real_length +
                       PADDING_ELEM_NUM;
   _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
@@ -60,7 +60,7 @@
     vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
                           "Not enough space for young surv histo.");
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-  memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
+  memset(_surviving_young_words, 0, real_length * sizeof(size_t));
 
   _plab_allocator = G1PLABAllocator::create_allocator(_g1h->allocator());
 
@@ -72,17 +72,20 @@
 }
 
 // Pass locally gathered statistics to global state.
-void G1ParScanThreadState::flush() {
+void G1ParScanThreadState::flush(size_t* surviving_young_words) {
   _dcq.flush();
   // Update allocation statistics.
   _plab_allocator->flush_and_retire_stats();
   _g1h->g1_policy()->record_age_table(&_age_table);
+
+  uint length = _g1h->g1_policy()->young_cset_region_length();
+  for (uint region_index = 0; region_index < length; region_index++) {
+    surviving_young_words[region_index] += _surviving_young_words[region_index];
+  }
 }
 
 G1ParScanThreadState::~G1ParScanThreadState() {
   delete _plab_allocator;
-  // Update heap statistics.
-  _g1h->update_surviving_young_words(_surviving_young_words);
   FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
 }
 
@@ -324,14 +327,31 @@
   return _states[worker_id];
 }
 
+void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) {
+  assert(worker_id < _n_workers, "out of bounds access");
+  _cards_scanned[worker_id] += cards_scanned;
+}
+
+size_t G1ParScanThreadStateSet::total_cards_scanned() const {
+  assert(_flushed, "thread local state from the per thread states should have been flushed");
+  return _total_cards_scanned;
+}
+
+const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
+  assert(_flushed, "thread local state from the per thread states should have been flushed");
+  return _surviving_young_words_total;
+}
+
 void G1ParScanThreadStateSet::flush() {
   assert(!_flushed, "thread local state from the per thread states should be flushed once");
+  assert(_total_cards_scanned == 0, "should have been cleared");
 
   for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
     G1ParScanThreadState* pss = _states[worker_index];
 
-    pss->flush();
+    _total_cards_scanned += _cards_scanned[worker_index];
 
+    pss->flush(_surviving_young_words_total);
     delete pss;
     _states[worker_index] = NULL;
   }
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Sep 09 14:22:45 2015 +0200
@@ -82,7 +82,7 @@
   }
 
  public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, size_t young_cset_length);
   ~G1ParScanThreadState();
 
   void set_ref_processor(ReferenceProcessor* rp) { _scanner.set_ref_processor(rp); }
@@ -121,7 +121,7 @@
     return _surviving_young_words + 1;
   }
 
-  void flush();
+  void flush(size_t* surviving_young_words);
 
  private:
   #define G1_PARTIAL_ARRAY_MASK 0x2
@@ -194,31 +194,45 @@
 class G1ParScanThreadStateSet : public StackObj {
   G1CollectedHeap* _g1h;
   G1ParScanThreadState** _states;
+  size_t* _surviving_young_words_total;
+  size_t* _cards_scanned;
+  size_t _total_cards_scanned;
   uint _n_workers;
   bool _flushed;
 
  public:
-  G1ParScanThreadStateSet(G1CollectedHeap* g1h, uint n_workers) :
+  G1ParScanThreadStateSet(G1CollectedHeap* g1h, uint n_workers, size_t young_cset_length) :
       _g1h(g1h),
       _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
+      _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)),
+      _cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)),
+      _total_cards_scanned(0),
       _n_workers(n_workers),
       _flushed(false) {
     for (uint i = 0; i < n_workers; ++i) {
-      _states[i] = new_par_scan_state(i);
+      _states[i] = new_par_scan_state(i, young_cset_length);
     }
+    memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t));
+    memset(_cards_scanned, 0, n_workers * sizeof(size_t));
   }
 
   ~G1ParScanThreadStateSet() {
     assert(_flushed, "thread local state from the per thread states should have been flushed");
     FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
+    FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   }
 
   void flush();
 
   G1ParScanThreadState* state_for_worker(uint worker_id);
 
+  void add_cards_scanned(uint worker_id, size_t cards_scanned);
+  size_t total_cards_scanned() const;
+  const size_t* surviving_young_words() const;
+
  private:
-  G1ParScanThreadState* new_par_scan_state(uint worker_id);
+  G1ParScanThreadState* new_par_scan_state(uint worker_id, size_t young_cset_length);
 };
 
 #endif // SHARE_VM_GC_G1_G1PARSCANTHREADSTATE_HPP
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState_ext.cpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState_ext.cpp	Wed Sep 09 14:22:45 2015 +0200
@@ -26,6 +26,6 @@
 
 #include "gc/g1/g1ParScanThreadState.hpp"
 
-G1ParScanThreadState* G1ParScanThreadStateSet::new_par_scan_state(uint worker_id) {
-  return new G1ParScanThreadState(_g1h, worker_id);
+G1ParScanThreadState* G1ParScanThreadStateSet::new_par_scan_state(uint worker_id, size_t young_cset_length) {
+  return new G1ParScanThreadState(_g1h, worker_id, young_cset_length);
 }
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Sep 09 14:22:45 2015 +0200
@@ -76,7 +76,6 @@
     _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
     _cset_rs_update_cl(NULL),
-    _cards_scanned(NULL), _total_cards_scanned(0),
     _prev_period_summary()
 {
   _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
@@ -228,9 +227,9 @@
   size_t cards_looked_up() { return _cards;}
 };
 
-void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
-                      OopClosure* non_heap_roots,
-                      uint worker_i) {
+size_t G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
+                        OopClosure* non_heap_roots,
+                        uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
   G1CodeBlobClosure code_root_cl(non_heap_roots);
@@ -246,11 +245,10 @@
   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
                             - scanRScl.strong_code_root_scan_time_sec();
 
-  assert(_cards_scanned != NULL, "invariant");
-  _cards_scanned[worker_i] = scanRScl.cards_done();
-
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
+
+  return scanRScl.cards_done();
 }
 
 // Closure used for updating RSets and recording references that
@@ -298,9 +296,9 @@
   HeapRegionRemSet::cleanup();
 }
 
-void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
-                                           OopClosure* non_heap_roots,
-                                           uint worker_i) {
+size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
+                                             OopClosure* non_heap_roots,
+                                             uint worker_i) {
 #if CARD_REPEAT_HISTO
   ct_freq_update_histo_and_reset();
 #endif
@@ -322,10 +320,11 @@
   DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
 
   updateRS(&into_cset_dcq, worker_i);
-  scanRS(oc, non_heap_roots, worker_i);
+  size_t cards_scanned = scanRS(oc, non_heap_roots, worker_i);
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
   _cset_rs_update_cl[worker_i] = NULL;
+  return cards_scanned;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
@@ -333,23 +332,9 @@
   _g1->set_refine_cte_cl_concurrency(false);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
-
-  guarantee( _cards_scanned == NULL, "invariant" );
-  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers(), mtGC);
-  for (uint i = 0; i < n_workers(); ++i) {
-    _cards_scanned[i] = 0;
-  }
-  _total_cards_scanned = 0;
 }
 
 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
-  guarantee( _cards_scanned != NULL, "invariant" );
-  _total_cards_scanned = 0;
-  for (uint i = 0; i < n_workers(); ++i) {
-    _total_cards_scanned += _cards_scanned[i];
-  }
-  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
-  _cards_scanned = NULL;
   // Cleanup after copy
   _g1->set_refine_cte_cl_concurrency(true);
   // Set all cards back to clean.
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Sep 09 10:34:22 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Sep 09 14:22:45 2015 +0200
@@ -62,9 +62,6 @@
 
   ConcurrentG1Refine*    _cg1r;
 
-  size_t*                _cards_scanned;
-  size_t                 _total_cards_scanned;
-
   // Used for caching the closure that is responsible for scanning
   // references into the collection set.
   G1ParPushHeapRSClosure** _cset_rs_update_cl;
@@ -94,9 +91,12 @@
   // partitioning the work to be done. It should be the same as
   // the "i" passed to the calling thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
-                                   OopClosure* non_heap_roots,
-                                   uint worker_i);
+  //
+  // Returns the number of cards scanned while looking for pointers
+  // into the collection set.
+  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
+                                     OopClosure* non_heap_roots,
+                                     uint worker_i);
 
   // Prepare for and cleanup after an oops_into_collection_set_do
   // call.  Must call each of these once before and after (in sequential
@@ -106,14 +106,13 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  void scanRS(G1ParPushHeapRSClosure* oc,
-              OopClosure* non_heap_roots,
-              uint worker_i);
+  size_t scanRS(G1ParPushHeapRSClosure* oc,
+                OopClosure* non_heap_roots,
+                uint worker_i);
 
   void updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i);
 
   CardTableModRefBS* ct_bs() { return _ct_bs; }
-  size_t cardsScanned() { return _total_cards_scanned; }
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.