8069330: Adjustment of concurrent refinement thresholds does not take hot card cache into account
authortschatzl
Tue, 13 Oct 2015 14:49:13 +0200
changeset 33204 b8a3901ac5b3
parent 33203 e1034e5d33eb
child 33206 da8c3575c076
8069330: Adjustment of concurrent refinement thresholds does not take hot card cache into account Summary: Measure HCC processing time separately and remove that time from the calculation of the refinement thresholds. HCC processing time is still taken into account for general pause time predictions. Reviewed-by: tbenson, jmasa
hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp
hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp
hotspot/src/share/vm/gc/g1/g1ErgoVerbose.cpp
hotspot/src/share/vm/gc/g1/g1ErgoVerbose.hpp
hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp
hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp
hotspot/src/share/vm/gc/g1/g1HotCardCache.cpp
hotspot/src/share/vm/gc/g1/g1HotCardCache.hpp
hotspot/src/share/vm/gc/g1/g1RemSet.cpp
hotspot/test/gc/g1/TestGCLogMessages.java
--- a/hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/concurrentG1Refine.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -112,6 +112,8 @@
   int thread_threshold_step() const { return _thread_threshold_step; }
 
   G1HotCardCache* hot_card_cache() { return &_hot_card_cache; }
+
+  static bool hot_card_cache_enabled() { return G1HotCardCache::default_use_cache(); }
 };
 
 #endif // SHARE_VM_GC_G1_CONCURRENTG1REFINE_HPP
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -2258,14 +2258,11 @@
 }
 #endif // PRODUCT
 
-void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl,
-                                                 DirtyCardQueue* into_cset_dcq,
-                                                 bool concurrent,
-                                                 uint worker_i) {
-  // Clean cards in the hot card cache
-  G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache();
-  hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);
-
+void G1CollectedHeap::iterate_hcc_closure(CardTableEntryClosure* cl, uint worker_i) {
+  _cg1r->hot_card_cache()->drain(cl, worker_i);
+}
+
+void G1CollectedHeap::iterate_dirty_card_closure(CardTableEntryClosure* cl, uint worker_i) {
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   size_t n_completed_buffers = 0;
   while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -1011,9 +1011,11 @@
   // continues humongous regions too.
   void reset_gc_time_stamps(HeapRegion* hr);
 
-  void iterate_dirty_card_closure(CardTableEntryClosure* cl,
-                                  DirtyCardQueue* into_cset_dcq,
-                                  bool concurrent, uint worker_i);
+  // Apply the given closure on all cards in the Hot Card Cache, emptying it.
+  void iterate_hcc_closure(CardTableEntryClosure* cl, uint worker_i);
+
+  // Apply the given closure on all cards in the Dirty Card Queue Set, emptying it.
+  void iterate_dirty_card_closure(CardTableEntryClosure* cl, uint worker_i);
 
   // The shared block offset table array.
   G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; }
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -92,6 +92,7 @@
   _prev_collection_pause_end_ms(0.0),
   _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+  _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
   _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
   _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_entry_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -192,6 +193,7 @@
 
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
   _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
+  _cost_scan_hcc_seq->add(0.0);
   _young_cards_per_entry_ratio_seq->add(
                                   young_cards_per_entry_ratio_defaults[index]);
   _cost_per_entry_ms_seq->add(cost_per_entry_ms_defaults[index]);
@@ -1046,10 +1048,12 @@
 
   if (update_stats) {
     double cost_per_card_ms = 0.0;
+    double cost_scan_hcc = phase_times()->average_time_ms(G1GCPhaseTimes::ScanHCC);
     if (_pending_cards > 0) {
-      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
+      cost_per_card_ms = (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) - cost_scan_hcc) / (double) _pending_cards;
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
+    _cost_scan_hcc_seq->add(cost_scan_hcc);
 
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
@@ -1146,8 +1150,25 @@
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
-  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
-                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);
+
+  double scan_hcc_time_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanHCC);
+
+  if (update_rs_time_goal_ms < scan_hcc_time_ms) {
+    ergo_verbose2(ErgoTiming,
+                  "adjust concurrent refinement thresholds",
+                  ergo_format_reason("Scanning the HCC expected to take longer than Update RS time goal")
+                  ergo_format_ms("Update RS time goal")
+                  ergo_format_ms("Scan HCC time"),
+                  update_rs_time_goal_ms,
+                  scan_hcc_time_ms);
+
+    update_rs_time_goal_ms = 0;
+  } else {
+    update_rs_time_goal_ms -= scan_hcc_time_ms;
+  }
+  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) - scan_hcc_time_ms,
+                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS),
+                               update_rs_time_goal_ms);
 
   _collectionSetChooser->verify();
 }
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -219,6 +219,7 @@
 
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _cost_per_card_ms_seq;
+  TruncatedSeq* _cost_scan_hcc_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
   TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
   TruncatedSeq* _cost_per_entry_ms_seq;
@@ -324,8 +325,12 @@
     return get_new_prediction(_cost_per_card_ms_seq);
   }
 
+  double predict_scan_hcc_ms() const {
+    return get_new_prediction(_cost_scan_hcc_seq);
+  }
+
   double predict_rs_update_time_ms(size_t pending_cards) const {
-    return (double) pending_cards * predict_cost_per_card_ms();
+    return (double) pending_cards * predict_cost_per_card_ms() + predict_scan_hcc_ms();
   }
 
   double predict_young_cards_per_entry_ratio() const {
--- a/hotspot/src/share/vm/gc/g1/g1ErgoVerbose.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ErgoVerbose.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -56,6 +56,7 @@
   case ErgoCSetConstruction:  return "CSet Construction";
   case ErgoConcCycles:        return "Concurrent Cycles";
   case ErgoMixedGCs:          return "Mixed GCs";
+  case ErgoTiming:            return "Timing";
   default:
     ShouldNotReachHere();
     // Keep the Windows compiler happy
--- a/hotspot/src/share/vm/gc/g1/g1ErgoVerbose.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1ErgoVerbose.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -70,6 +70,7 @@
   ErgoCSetConstruction,
   ErgoConcCycles,
   ErgoMixedGCs,
+  ErgoTiming,
 
   ErgoHeuristicNum
 } ErgoHeuristic;
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/g1/concurrentG1Refine.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1GCPhaseTimes.hpp"
 #include "gc/g1/g1Log.hpp"
@@ -269,6 +270,8 @@
   _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFinest, 3);
 
   _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
+  _gc_par_phases[ScanHCC] = new WorkerDataArray<double>(max_gc_threads, "Scan HCC (ms)", true, G1Log::LevelFiner, 3);
+  _gc_par_phases[ScanHCC]->set_enabled(ConcurrentG1Refine::hot_card_cache_enabled());
   _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
   _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
   _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -56,6 +56,7 @@
     WeakCLDRoots,
     SATBFiltering,
     UpdateRS,
+    ScanHCC,
     ScanRS,
     CodeRoots,
     ObjCopy,
--- a/hotspot/src/share/vm/gc/g1/g1HotCardCache.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1HotCardCache.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -26,7 +26,6 @@
 #include "gc/g1/dirtyCardQueue.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
 #include "gc/g1/g1HotCardCache.hpp"
-#include "gc/g1/g1RemSet.hpp"
 #include "runtime/atomic.inline.hpp"
 
 G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h):
@@ -81,9 +80,7 @@
   return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
 }
 
-void G1HotCardCache::drain(uint worker_i,
-                           G1RemSet* g1rs,
-                           DirtyCardQueue* into_cset_dcq) {
+void G1HotCardCache::drain(CardTableEntryClosure* cl, uint worker_i) {
   if (!default_use_cache()) {
     assert(_hot_cache == NULL, "Logic");
     return;
@@ -101,21 +98,8 @@
     for (size_t i = start_idx; i < end_idx; i++) {
       jbyte* card_ptr = _hot_cache[i];
       if (card_ptr != NULL) {
-        if (g1rs->refine_card(card_ptr, worker_i, true)) {
-          // The part of the heap spanned by the card contains references
-          // that point into the current collection set.
-          // We need to record the card pointer in the DirtyCardQueueSet
-          // that we use for such cards.
-          //
-          // The only time we care about recording cards that contain
-          // references that point into the collection set is during
-          // RSet updating while within an evacuation pause.
-          // In this case worker_i should be the id of a GC worker thread
-          assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-          assert(worker_i < ParallelGCThreads, "incorrect worker id: %u", worker_i);
-
-          into_cset_dcq->enqueue(card_ptr);
-        }
+        bool result = cl->do_card_ptr(card_ptr, worker_i);
+        assert(result, "Closure should always return true");
       } else {
         break;
       }
--- a/hotspot/src/share/vm/gc/g1/g1HotCardCache.hpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1HotCardCache.hpp	Tue Oct 13 14:49:13 2015 +0200
@@ -32,9 +32,9 @@
 #include "runtime/thread.hpp"
 #include "utilities/globalDefinitions.hpp"
 
+class CardTableEntryClosure;
 class DirtyCardQueue;
 class G1CollectedHeap;
-class G1RemSet;
 class HeapRegion;
 
 // An evicting cache of cards that have been logged by the G1 post
@@ -84,11 +84,11 @@
   // The number of cached cards a thread claims when flushing the cache
   static const int ClaimChunkSize = 32;
 
-  bool default_use_cache() const {
+ public:
+  static bool default_use_cache() {
     return (G1ConcRSLogCacheSize > 0);
   }
 
- public:
   G1HotCardCache(G1CollectedHeap* g1h);
   ~G1HotCardCache();
 
@@ -113,7 +113,7 @@
 
   // Refine the cards that have delayed as a result of
   // being in the cache.
-  void drain(uint worker_i, G1RemSet* g1rs, DirtyCardQueue* into_cset_dcq);
+  void drain(CardTableEntryClosure* cl, uint worker_i);
 
   // Set up for parallel processing of the cards in the hot cache
   void reset_hot_cache_claimed_index() {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Tue Oct 13 14:49:13 2015 +0200
@@ -263,6 +263,7 @@
                                               DirtyCardQueue* into_cset_dcq) :
     _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq)
   {}
+
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
     // The only time we care about recording cards that
     // contain references that point into the collection set
@@ -285,11 +286,16 @@
 };
 
 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
-  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
-  // Apply the given closure to all remaining log entries.
   RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
 
-  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
+  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
+  {
+    // Apply the closure to the entries of the hot card cache.
+    G1GCParPhaseTimesTracker y(_g1p->phase_times(), G1GCPhaseTimes::ScanHCC, worker_i);
+    _g1->iterate_hcc_closure(&into_cset_update_rs_cl, worker_i);
+  }
+  // Apply the closure to all remaining log entries.
+  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, worker_i);
 }
 
 void G1RemSet::cleanupHRRS() {
--- a/hotspot/test/gc/g1/TestGCLogMessages.java	Tue Oct 13 08:37:44 2015 +0200
+++ b/hotspot/test/gc/g1/TestGCLogMessages.java	Tue Oct 13 14:49:13 2015 +0200
@@ -23,7 +23,7 @@
 
 /*
  * @test TestGCLogMessages
- * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962
+ * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962 8069330
  * @summary Ensure that the PrintGCDetails output for a minor GC with G1
  * includes the expected necessary messages.
  * @key gc
@@ -55,6 +55,8 @@
     };
 
     private LogMessageWithLevel allLogMessages[] = new LogMessageWithLevel[] {
+        // Update RS
+        new LogMessageWithLevel("Scan HCC (ms)", Level.FINER),
         // Ext Root Scan
         new LogMessageWithLevel("Thread Roots (ms)", Level.FINEST),
         new LogMessageWithLevel("StringTable Roots (ms)", Level.FINEST),