8227739: Merge cost predictions for scanning cards and log buffer entries
authortschatzl
Fri, 29 Nov 2019 10:20:14 +0100
changeset 59319 9ee940f1de90
parent 59318 70021dbed82b
child 59320 11ff4e485670
8227739: Merge cost predictions for scanning cards and log buffer entries Summary: Revamp the cost predictions for the changes in JDK-8200545 and JDK-8213108. Reviewed-by: sjohanss, kbarrett
src/hotspot/share/gc/g1/g1Analytics.cpp
src/hotspot/share/gc/g1/g1Analytics.hpp
src/hotspot/share/gc/g1/g1CardTable.hpp
src/hotspot/share/gc/g1/g1CardTable.inline.hpp
src/hotspot/share/gc/g1/g1CollectionSet.cpp
src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
src/hotspot/share/gc/g1/g1HotCardCache.cpp
src/hotspot/share/gc/g1/g1HotCardCache.hpp
src/hotspot/share/gc/g1/g1Policy.cpp
src/hotspot/share/gc/g1/g1Policy.hpp
src/hotspot/share/gc/g1/g1RemSet.cpp
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -45,11 +45,11 @@
 };
 
 // all the same
-static double young_cards_per_entry_ratio_defaults[] = {
+static double young_card_merge_to_scan_ratio_defaults[] = {
   1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
 
-static double young_only_cost_per_remset_card_ms_defaults[] = {
+static double young_only_cost_per_card_scan_ms_defaults[] = {
   0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005
 };
 
@@ -62,7 +62,6 @@
   5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
 };
 
-
 static double young_other_cost_per_region_ms_defaults[] = {
   0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1
 };
@@ -81,13 +80,13 @@
     _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
     _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
-    _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -109,11 +108,10 @@
   _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
   // Some applications have very low rates for logging cards.
   _logged_cards_rate_ms_seq->add(0.0);
-  _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
-  _cost_scan_hcc_seq->add(0.0);
-  _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
-  _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]);
-  _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
+  _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]);
+  _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]);
+
+  _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]);
   _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]);
   _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]);
   _non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]);
@@ -123,6 +121,10 @@
   _concurrent_mark_cleanup_times_ms->add(0.20);
 }
 
+bool G1Analytics::enough_samples_available(TruncatedSeq const* seq) const {
+  return seq->num() >= 3;
+}
+
 double G1Analytics::get_new_prediction(TruncatedSeq const* seq) const {
   return _predictor->get_new_prediction(seq);
 }
@@ -166,27 +168,27 @@
   _logged_cards_rate_ms_seq->add(cards_per_ms);
 }
 
-void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
-  _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
-}
-
-void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) {
-  _cost_scan_hcc_seq->add(cost_scan_hcc);
-}
-
-void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) {
   if (for_young_gc) {
-    _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+    _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
   } else {
-    _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms);
+    _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms);
   }
 }
 
-void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) {
+void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) {
   if (for_young_gc) {
-    _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
   } else {
-    _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio);
+    _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms);
+  }
+}
+
+void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) {
+  if (for_young_gc) {
+    _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
+  } else {
+    _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio);
   }
 }
 
@@ -198,7 +200,7 @@
   if (mark_or_rebuild_in_progress) {
     _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
   } else {
-    _cost_per_byte_ms_seq->add(cost_per_byte_ms);
+    _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms);
   }
 }
 
@@ -234,57 +236,37 @@
   return get_new_prediction(_logged_cards_rate_ms_seq);
 }
 
-double G1Analytics::predict_cost_per_logged_card_ms() const {
-  return get_new_prediction(_cost_per_logged_card_ms_seq);
-}
-
-double G1Analytics::predict_scan_hcc_ms() const {
-  return get_new_prediction(_cost_scan_hcc_seq);
+double G1Analytics::predict_young_card_merge_to_scan_ratio() const {
+  return get_new_prediction(_young_card_merge_to_scan_ratio_seq);
 }
 
-double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const {
-  return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms();
-}
-
-double G1Analytics::predict_young_cards_per_entry_ratio() const {
-  return get_new_prediction(_young_cards_per_entry_ratio_seq);
-}
-
-double G1Analytics::predict_mixed_cards_per_entry_ratio() const {
-  if (_mixed_cards_per_entry_ratio_seq->num() < 2) {
-    return predict_young_cards_per_entry_ratio();
+size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_card_merge_to_scan_ratio_seq)) {
+    return (size_t) (rs_length * predict_young_card_merge_to_scan_ratio());
   } else {
-    return get_new_prediction(_mixed_cards_per_entry_ratio_seq);
+    return (size_t) (rs_length * get_new_prediction(_mixed_card_merge_to_scan_ratio_seq));
   }
 }
 
-size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const {
-  if (for_young_gc) {
-    return (size_t) (rs_length * predict_young_cards_per_entry_ratio());
+double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_merge_ms_seq)) {
+    return card_num * get_new_prediction(_young_cost_per_card_merge_ms_seq);
   } else {
-    return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio());
+    return card_num * get_new_prediction(_mixed_cost_per_card_merge_ms_seq);
   }
 }
 
-double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const {
-  if (for_young_gc) {
-    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
+double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const {
+  if (for_young_gc || !enough_samples_available(_mixed_cost_per_card_scan_ms_seq)) {
+    return card_num * get_new_prediction(_young_cost_per_card_scan_ms_seq);
   } else {
-    return predict_mixed_rs_scan_time_ms(card_num);
-  }
-}
-
-double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const {
-  if (_mixed_cost_per_remset_card_ms_seq->num() < 3) {
-    return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq);
-  } else {
-    return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq);
+    return card_num * get_new_prediction(_mixed_cost_per_card_scan_ms_seq);
   }
 }
 
 double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const {
-  if (_cost_per_byte_ms_during_cm_seq->num() < 3) {
-    return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq);
+  if (!enough_samples_available(_cost_per_byte_ms_during_cm_seq)) {
+    return (1.1 * bytes_to_copy) * get_new_prediction(_copy_cost_per_byte_ms_seq);
   } else {
     return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq);
   }
@@ -294,14 +276,10 @@
   if (during_concurrent_mark) {
     return predict_object_copy_time_ms_during_cm(bytes_to_copy);
   } else {
-    return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq);
+    return bytes_to_copy * get_new_prediction(_copy_cost_per_byte_ms_seq);
   }
 }
 
-double G1Analytics::predict_cost_per_byte_ms() const {
-  return get_new_prediction(_cost_per_byte_ms_seq);
-}
-
 double G1Analytics::predict_constant_other_time_ms() const {
   return get_new_prediction(_constant_other_time_ms_seq);
 }
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -48,13 +48,21 @@
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _concurrent_refine_rate_ms_seq;
   TruncatedSeq* _logged_cards_rate_ms_seq;
-  TruncatedSeq* _cost_per_logged_card_ms_seq;
-  TruncatedSeq* _cost_scan_hcc_seq;
-  TruncatedSeq* _young_cards_per_entry_ratio_seq;
-  TruncatedSeq* _mixed_cards_per_entry_ratio_seq;
-  TruncatedSeq* _young_only_cost_per_remset_card_ms_seq;
-  TruncatedSeq* _mixed_cost_per_remset_card_ms_seq;
-  TruncatedSeq* _cost_per_byte_ms_seq;
+  // The ratio between the number of merged cards and actually scanned cards, for
+  // young-only and mixed gcs.
+  TruncatedSeq* _young_card_merge_to_scan_ratio_seq;
+  TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq;
+
+  // The cost to scan a card during young-only and mixed gcs in ms.
+  TruncatedSeq* _young_cost_per_card_scan_ms_seq;
+  TruncatedSeq* _mixed_cost_per_card_scan_ms_seq;
+
+  // The cost to merge a card during young-only and mixed gcs in ms.
+  TruncatedSeq* _young_cost_per_card_merge_ms_seq;
+  TruncatedSeq* _mixed_cost_per_card_merge_ms_seq;
+
+  // The cost to copy a byte in ms.
+  TruncatedSeq* _copy_cost_per_byte_ms_seq;
   TruncatedSeq* _constant_other_time_ms_seq;
   TruncatedSeq* _young_other_cost_per_region_ms_seq;
   TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
@@ -72,6 +80,10 @@
   double _recent_avg_pause_time_ratio;
   double _last_pause_time_ratio;
 
+  // Returns whether the sequence have enough samples to get a "good" prediction.
+  // The constant used is random but "small".
+  bool enough_samples_available(TruncatedSeq const* seq) const;
+
   double get_new_prediction(TruncatedSeq const* seq) const;
   size_t get_new_size_prediction(TruncatedSeq const* seq) const;
 
@@ -103,10 +115,9 @@
   void report_alloc_rate_ms(double alloc_rate);
   void report_concurrent_refine_rate_ms(double cards_per_ms);
   void report_logged_cards_rate_ms(double cards_per_ms);
-  void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
-  void report_cost_scan_hcc(double cost_scan_hcc);
-  void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
-  void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc);
+  void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc);
+  void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc);
+  void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc);
   void report_rs_length_diff(double rs_length_diff);
   void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress);
   void report_young_other_cost_per_region_ms(double other_cost_per_region_ms);
@@ -120,21 +131,14 @@
 
   double predict_concurrent_refine_rate_ms() const;
   double predict_logged_cards_rate_ms() const;
-  double predict_cost_per_logged_card_ms() const;
-
-  double predict_scan_hcc_ms() const;
+  double predict_young_card_merge_to_scan_ratio() const;
 
-  double predict_rs_update_time_ms(size_t pending_cards) const;
-
-  double predict_young_cards_per_entry_ratio() const;
+  double predict_mixed_card_merge_to_scan_ratio() const;
 
-  double predict_mixed_cards_per_entry_ratio() const;
-
-  size_t predict_card_num(size_t rs_length, bool for_young_gc) const;
+  size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const;
 
-  double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const;
-
-  double predict_mixed_rs_scan_time_ms(size_t card_num) const;
+  double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const;
+  double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const;
 
   double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const;
 
@@ -153,8 +157,6 @@
   size_t predict_rs_length() const;
   size_t predict_pending_cards() const;
 
-  double predict_cost_per_byte_ms() const;
-
   // Add a new GC of the given duration and end time to the record.
   void update_recent_gc_times(double end_time_sec, double elapsed_ms);
   void compute_pause_time_ratio(double interval_ms, double pause_time_ms);
--- a/src/hotspot/share/gc/g1/g1CardTable.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CardTable.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -92,12 +92,16 @@
     return pointer_delta(p, _byte_map, sizeof(CardValue));
   }
 
-  // Mark the given card as Dirty if it is Clean.
-  inline void mark_clean_as_dirty(size_t card_index);
+  // Mark the given card as Dirty if it is Clean. Returns the number of dirtied
+  // cards that were not yet dirty. This result may be inaccurate as it does not
+  // perform the dirtying atomically.
+  inline size_t mark_clean_as_dirty(size_t card_index);
 
   // Change Clean cards in a (large) area on the card table as Dirty, preserving
   // already scanned cards. Assumes that most cards in that area are Clean.
-  inline void mark_region_dirty(size_t start_card_index, size_t num_cards);
+  // Returns the number of dirtied cards that were not yet dirty. This result may
+  // be inaccurate as it does not perform the dirtying atomically.
+  inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards);
 
   // Mark the given range of cards as Scanned. All of these cards must be Dirty.
   inline void mark_as_scanned(size_t start_card_index, size_t num_cards);
--- a/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CardTable.inline.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -33,17 +33,21 @@
   return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift));
 }
 
-inline void G1CardTable::mark_clean_as_dirty(size_t card_index) {
+inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) {
   CardValue value = _byte_map[card_index];
   if (value == clean_card_val()) {
     _byte_map[card_index] = dirty_card_val();
+    return 1;
   }
+  return 0;
 }
 
-inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
+inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) {
   assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned.");
   assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible.");
 
+  size_t result = 0;
+
   size_t const num_chunks = num_cards / sizeof(size_t);
 
   size_t* cur_word = (size_t*)&_byte_map[start_card_index];
@@ -52,6 +56,7 @@
     size_t value = *cur_word;
     if (value == WordAllClean) {
       *cur_word = WordAllDirty;
+      result += sizeof(value);
     } else if (value == WordAllDirty) {
       // do nothing.
     } else {
@@ -61,12 +66,15 @@
         CardValue value = *cur;
         if (value == clean_card_val()) {
           *cur = dirty_card_val();
+          result++;
         }
         cur++;
       }
     }
     cur_word++;
   }
+
+  return result;
 }
 
 inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) {
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -27,6 +27,7 @@
 #include "gc/g1/g1CollectionSet.hpp"
 #include "gc/g1/g1CollectionSetCandidates.hpp"
 #include "gc/g1/g1CollectorState.hpp"
+#include "gc/g1/g1HotCardCache.hpp"
 #include "gc/g1/g1ParScanThreadState.hpp"
 #include "gc/g1/g1Policy.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
@@ -410,7 +411,7 @@
   guarantee(target_pause_time_ms > 0.0,
             "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
 
-  size_t pending_cards = _policy->pending_cards_at_gc_start();
+  size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries();
   double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
   double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
 
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -72,6 +72,8 @@
   _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine);
   _merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
   _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse);
+  _merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+  _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards);
 
   _gc_par_phases[OptMergeRS] = new WorkerDataArray<double>(max_gc_threads, "Optional Remembered Sets (ms):");
   _opt_merge_rs_merged_sparse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Sparse:");
@@ -80,6 +82,8 @@
   _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine);
   _opt_merge_rs_merged_coarse = new WorkerDataArray<size_t>(max_gc_threads, "Merged Coarse:");
   _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse);
+  _opt_merge_rs_dirty_cards = new WorkerDataArray<size_t>(max_gc_threads, "Dirty Cards:");
+  _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards);
 
   _gc_par_phases[MergeLB] = new WorkerDataArray<double>(max_gc_threads, "Log Buffers (ms):");
   if (G1HotCardCache::default_use_cache()) {
@@ -304,10 +308,16 @@
 
 // return the average time for a phase in milliseconds
 double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
+  if (_gc_par_phases[phase] == NULL) {
+    return 0.0;
+  }
   return _gc_par_phases[phase]->average() * 1000.0;
 }
 
 size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
+  if (_gc_par_phases[phase] == NULL) {
+    return 0;
+  }
   assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
   return _gc_par_phases[phase]->thread_work_items(index)->sum();
 }
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -88,7 +88,8 @@
   enum GCMergeRSWorkTimes {
     MergeRSMergedSparse,
     MergeRSMergedFine,
-    MergeRSMergedCoarse
+    MergeRSMergedCoarse,
+    MergeRSDirtyCards
   };
 
   enum GCScanHRWorkItems {
@@ -124,6 +125,7 @@
   WorkerDataArray<size_t>* _merge_rs_merged_sparse;
   WorkerDataArray<size_t>* _merge_rs_merged_fine;
   WorkerDataArray<size_t>* _merge_rs_merged_coarse;
+  WorkerDataArray<size_t>* _merge_rs_dirty_cards;
 
   WorkerDataArray<size_t>* _merge_hcc_dirty_cards;
   WorkerDataArray<size_t>* _merge_hcc_skipped_cards;
@@ -138,6 +140,7 @@
   WorkerDataArray<size_t>* _opt_merge_rs_merged_sparse;
   WorkerDataArray<size_t>* _opt_merge_rs_merged_fine;
   WorkerDataArray<size_t>* _opt_merge_rs_merged_coarse;
+  WorkerDataArray<size_t>* _opt_merge_rs_dirty_cards;
 
   WorkerDataArray<size_t>* _opt_scan_hr_scanned_cards;
   WorkerDataArray<size_t>* _opt_scan_hr_scanned_blocks;
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -32,7 +32,7 @@
 G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h):
   _g1h(g1h), _use_cache(false), _card_counts(g1h),
   _hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0),
-  _hot_cache_idx(0), _hot_cache_par_claimed_idx(0)
+  _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false)
 {}
 
 void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) {
@@ -48,6 +48,8 @@
     _hot_cache_par_chunk_size = ClaimChunkSize;
     _hot_cache_par_claimed_idx = 0;
 
+    _cache_wrapped_around = false;
+
     _card_counts.initialize(card_counts_storage);
   }
 }
@@ -69,6 +71,11 @@
   }
   // Otherwise, the card is hot.
   size_t index = Atomic::add(&_hot_cache_idx, 1u) - 1;
+  if (index == _hot_cache_size) {
+    // Can use relaxed store because all racing threads are writing the same
+    // value and there aren't any concurrent readers.
+    Atomic::store(&_cache_wrapped_around, true);
+  }
   size_t masked_index = index & (_hot_cache_size - 1);
   CardValue* current_ptr = _hot_cache[masked_index];
 
--- a/src/hotspot/share/gc/g1/g1HotCardCache.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -81,6 +81,11 @@
 
   char _pad_after[DEFAULT_CACHE_LINE_SIZE];
 
+  // Records whether insertion overflowed the hot card cache at least once. This
+  // avoids the need for a separate atomic counter of how many valid entries are
+  // in the HCC.
+  volatile bool _cache_wrapped_around;
+
   // The number of cached cards a thread claims when flushing the cache
   static const int ClaimChunkSize = 32;
 
@@ -125,13 +130,17 @@
     assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
     assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
     if (default_use_cache()) {
-        reset_hot_cache_internal();
+      reset_hot_cache_internal();
     }
   }
 
   // Zeros the values in the card counts table for the given region
   void reset_card_counts(HeapRegion* hr);
 
+  // Number of entries in the HCC.
+  size_t num_entries() const {
+    return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1;
+  }
  private:
   void reset_hot_cache_internal() {
     assert(_hot_cache != NULL, "Logic");
@@ -139,6 +148,7 @@
     for (size_t i = 0; i < _hot_cache_size; i++) {
       _hot_cache[i] = NULL;
     }
+    _cache_wrapped_around = false;
   }
 };
 
--- a/src/hotspot/share/gc/g1/g1Policy.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -329,9 +329,8 @@
   const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
   const double survivor_regions_evac_time = predict_survivor_regions_evac_time();
   const size_t pending_cards = _analytics->predict_pending_cards();
-  const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */);
   const double base_time_ms =
-    predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
+    predict_base_elapsed_time_ms(pending_cards, rs_length) +
     survivor_regions_evac_time;
   const uint available_free_regions = _free_regions_at_end_of_collection;
   const uint base_free_regions =
@@ -713,67 +712,58 @@
   }
 
   _short_lived_surv_rate_group->start_adding_regions();
-  // Do that for any other surv rate groups
 
-  double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0;
-
+  double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC);
   if (update_stats) {
-    double cost_per_logged_card = 0.0;
-    size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
-    if (pending_logged_cards > 0) {
-      cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards;
-      _analytics->report_cost_per_logged_card_ms(cost_per_logged_card);
-    }
-    _analytics->report_cost_scan_hcc(scan_hcc_time_ms);
+    size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) +
+                                          p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+    // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase.
+    size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+                                      p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) +
+                                      total_log_buffer_cards;
 
-    size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
-                                       p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
-    size_t remset_cards_scanned = 0;
-    // There might have been duplicate log buffer entries in the queues which could
-    // increase this value beyond the cards scanned. In this case attribute all cards
-    // to the log buffers.
-    if (pending_logged_cards <= total_cards_scanned) {
-      remset_cards_scanned = total_cards_scanned - pending_logged_cards;
+    // The threshold for the number of cards in a given sampling which we consider
+    // large enough so that the impact from setup and other costs is negligible.
+    size_t const CardsNumSamplingThreshold = 10;
+
+    if (total_cards_merged > CardsNumSamplingThreshold) {
+      double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) +
+                                    average_time_ms(G1GCPhaseTimes::MergeRS) +
+                                    average_time_ms(G1GCPhaseTimes::MergeHCC) +
+                                    average_time_ms(G1GCPhaseTimes::MergeLB) +
+                                    average_time_ms(G1GCPhaseTimes::OptMergeRS);
+      _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only);
     }
 
-    double cost_per_remset_card_ms = 0.0;
-    if (remset_cards_scanned > 10) {
-      double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) *
-                                     remset_cards_scanned / total_cards_scanned) +
-                                     average_time_ms(G1GCPhaseTimes::MergeER) +
-                                     average_time_ms(G1GCPhaseTimes::MergeRS) +
-                                     average_time_ms(G1GCPhaseTimes::OptMergeRS);
+    // Update prediction for card scan
+    size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
+                                       p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
 
-      cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned;
-      _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only);
-    }
+    if (total_cards_scanned > CardsNumSamplingThreshold) {
+      double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) +
+                                        average_time_ms(G1GCPhaseTimes::OptScanHR);
 
-    if (_rs_length > 0) {
-      double cards_per_entry_ratio =
-        (double) remset_cards_scanned / (double) _rs_length;
-      _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only);
+      _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only);
     }
 
-    // This is defensive. For a while _rs_length could get
-    // smaller than _recorded_rs_length which was causing
-    // rs_length_diff to get very large and mess up the RSet length
-    // predictions. The reason was unsafe concurrent updates to the
-    // _inc_cset_recorded_rs_length field which the code below guards
-    // against (see CR 7118202). This bug has now been fixed (see CR
-    // 7119027). However, I'm still worried that
-    // _inc_cset_recorded_rs_length might still end up somewhat
-    // inaccurate. The concurrent refinement thread calculates an
-    // RSet's length concurrently with other CR threads updating it
-    // which might cause it to calculate the length incorrectly (if,
-    // say, it's in mid-coarsening). So I'll leave in the defensive
-    // conditional below just in case.
-    size_t rs_length_diff = 0;
-    size_t recorded_rs_length = _collection_set->recorded_rs_length();
-    if (_rs_length > recorded_rs_length) {
-      rs_length_diff = _rs_length - recorded_rs_length;
+    // Update prediction for the ratio between cards from the remembered
+    // sets and actually scanned cards from the remembered sets.
+    // Cards from the remembered sets are all cards not duplicated by cards from
+    // the logs.
+    // Due to duplicates in the log buffers, the number of actually scanned cards
+    // can be smaller than the cards in the log buffers.
+    const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0;
+    double merge_to_scan_ratio = 0.0;
+    if (total_cards_scanned > 0) {
+      merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned;
     }
-    _analytics->report_rs_length_diff((double) rs_length_diff);
+    _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only);
 
+    const size_t recorded_rs_length = _collection_set->recorded_rs_length();
+    const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0;
+    _analytics->report_rs_length_diff(rs_length_diff);
+
+    // Update prediction for copy cost per byte
     size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::MergePSS, G1GCPhaseTimes::MergePSSCopiedBytes);
 
     if (copied_bytes > 0) {
@@ -842,21 +832,21 @@
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
 
-  if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) {
+  if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) {
     log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)."
                                 "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms",
-                                scan_logged_cards_time_goal_ms, scan_hcc_time_ms);
+                                scan_logged_cards_time_goal_ms, merge_hcc_time_ms);
 
     scan_logged_cards_time_goal_ms = 0;
   } else {
-    scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
+    scan_logged_cards_time_goal_ms -= merge_hcc_time_ms;
   }
 
   _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
   double const logged_cards_time = logged_cards_processing_time();
 
   log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
-                              scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms);
+                              scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms);
 
   _g1h->concurrent_refine()->adjust(logged_cards_time,
                                     phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards),
@@ -936,17 +926,17 @@
 }
 
 double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards,
-                                              size_t scanned_cards) const {
+                                              size_t rs_length) const {
+  size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase());
   return
-    _analytics->predict_rs_update_time_ms(pending_cards) +
-    _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) +
     _analytics->predict_constant_other_time_ms();
 }
 
 double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const {
   size_t rs_length = _analytics->predict_rs_length();
-  size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase());
-  return predict_base_elapsed_time_ms(pending_cards, card_num);
+  return predict_base_elapsed_time_ms(pending_cards, rs_length);
 }
 
 size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const {
@@ -965,13 +955,13 @@
 double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr,
                                                 bool for_young_gc) const {
   size_t rs_length = hr->rem_set()->occupied();
-  // Predicting the number of cards is based on which type of GC
-  // we're predicting for.
-  size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc);
+  size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc);
+
   size_t bytes_to_copy = predict_bytes_to_copy(hr);
 
   double region_elapsed_time_ms =
-    _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) +
+    _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) +
     _analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress());
 
   // The prediction of the "other" time for this region is based
--- a/src/hotspot/share/gc/g1/g1Policy.hpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp	Fri Nov 29 10:20:14 2019 +0100
@@ -140,9 +140,9 @@
     _rs_length = rs_length;
   }
 
-  double predict_base_elapsed_time_ms(size_t pending_cards) const;
-  double predict_base_elapsed_time_ms(size_t pending_cards,
-                                      size_t scanned_cards) const;
+  double predict_base_elapsed_time_ms(size_t num_pending_cards) const;
+  double predict_base_elapsed_time_ms(size_t num_pending_cards,
+                                      size_t rs_length) const;
   size_t predict_bytes_to_copy(HeapRegion* hr) const;
   double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const;
 
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Nov 29 11:28:39 2019 +0300
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Fri Nov 29 10:20:14 2019 +0100
@@ -920,6 +920,8 @@
     uint _merged_fine;
     uint _merged_coarse;
 
+    size_t _cards_dirty;
+
     // Returns if the region contains cards we need to scan. If so, remember that
     // region in the current set of dirty regions.
     bool remember_if_interesting(uint const region_idx) {
@@ -935,7 +937,8 @@
       _ct(G1CollectedHeap::heap()->card_table()),
       _merged_sparse(0),
       _merged_fine(0),
-      _merged_coarse(0) { }
+      _merged_coarse(0),
+      _cards_dirty(0) { }
 
     void next_coarse_prt(uint const region_idx) {
       if (!remember_if_interesting(region_idx)) {
@@ -945,7 +948,7 @@
       _merged_coarse++;
 
       size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
-      _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion);
+      _cards_dirty += _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion);
       _scan_state->set_chunk_region_dirty(region_base_idx);
     }
 
@@ -959,7 +962,7 @@
       size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
       BitMap::idx_t cur = bm->get_next_one_offset(0);
       while (cur != bm->size()) {
-        _ct->mark_clean_as_dirty(region_base_idx + cur);
+        _cards_dirty += _ct->mark_clean_as_dirty(region_base_idx + cur);
         _scan_state->set_chunk_dirty(region_base_idx + cur);
         cur = bm->get_next_one_offset(cur + 1);
       }
@@ -975,7 +978,7 @@
       size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion;
       for (uint i = 0; i < num_cards; i++) {
         size_t card_idx = region_base_idx + cards[i];
-        _ct->mark_clean_as_dirty(card_idx);
+        _cards_dirty += _ct->mark_clean_as_dirty(card_idx);
         _scan_state->set_chunk_dirty(card_idx);
       }
     }
@@ -994,6 +997,8 @@
     size_t merged_sparse() const { return _merged_sparse; }
     size_t merged_fine() const { return _merged_fine; }
     size_t merged_coarse() const { return _merged_coarse; }
+
+    size_t cards_dirty() const { return _cards_dirty; }
   };
 
   // Visitor for the remembered sets of humongous candidate regions to merge their
@@ -1039,6 +1044,8 @@
     size_t merged_sparse() const { return _cl.merged_sparse(); }
     size_t merged_fine() const { return _cl.merged_fine(); }
     size_t merged_coarse() const { return _cl.merged_coarse(); }
+
+    size_t cards_dirty() const { return _cl.cards_dirty(); }
   };
 
   // Visitor for the log buffer entries to merge them into the card table.
@@ -1140,6 +1147,7 @@
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse);
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine);
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse);
+      p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards);
     }
 
     // Merge remembered sets of current candidates.
@@ -1151,6 +1159,7 @@
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse);
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine);
       p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse);
+      p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards);
     }
 
     // Apply closure to log entries in the HCC.