--- a/src/hotspot/share/gc/g1/g1Policy.cpp	Thu Oct 17 20:27:44 2019 +0100
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp	Thu Oct 17 20:53:35 2019 +0100
@@ -68,9 +68,13 @@
   _reserve_regions(0),
   _young_gen_sizer(G1YoungGenSizer::create_gen_sizer()),
   _free_regions_at_end_of_collection(0),
-  _max_rs_lengths(0),
-  _rs_lengths_prediction(0),
-  _pending_cards(0),
+  _max_rs_length(0),
+  _rs_length_prediction(0),
+  _pending_cards_at_gc_start(0),
+  _pending_cards_at_prev_gc_end(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
+  _total_concurrent_refinement_time(),
   _bytes_allocated_in_old_since_last_gc(0),
   _initial_mark_to_mixed(),
   _collection_set(NULL),
@@ -219,23 +223,23 @@
 }
 
 uint G1Policy::update_young_list_max_and_target_length() {
-  return update_young_list_max_and_target_length(_analytics->predict_rs_lengths());
+  return update_young_list_max_and_target_length(_analytics->predict_rs_length());
 }
 
-uint G1Policy::update_young_list_max_and_target_length(size_t rs_lengths) {
-  uint unbounded_target_length = update_young_list_target_length(rs_lengths);
+uint G1Policy::update_young_list_max_and_target_length(size_t rs_length) {
+  uint unbounded_target_length = update_young_list_target_length(rs_length);
   update_max_gc_locker_expansion();
   return unbounded_target_length;
 }
 
-uint G1Policy::update_young_list_target_length(size_t rs_lengths) {
-  YoungTargetLengths young_lengths = young_list_target_lengths(rs_lengths);
+uint G1Policy::update_young_list_target_length(size_t rs_length) {
+  YoungTargetLengths young_lengths = young_list_target_lengths(rs_length);
   _young_list_target_length = young_lengths.first;
 
   return young_lengths.second;
 }
 
-G1Policy::YoungTargetLengths G1Policy::young_list_target_lengths(size_t rs_lengths) const {
+G1Policy::YoungTargetLengths G1Policy::young_list_target_lengths(size_t rs_length) const {
   YoungTargetLengths result;
 
   // Calculate the absolute and desired min bounds first.
@@ -256,7 +260,7 @@
   if (use_adaptive_young_list_length()) {
     if (collector_state()->in_young_only_phase()) {
       young_list_target_length =
-                        calculate_young_list_target_length(rs_lengths,
+                        calculate_young_list_target_length(rs_length,
                                                            base_min_length,
                                                            desired_min_length,
                                                            desired_max_length);
@@ -301,7 +305,7 @@
 }
 
 uint
-G1Policy::calculate_young_list_target_length(size_t rs_lengths,
+G1Policy::calculate_young_list_target_length(size_t rs_length,
                                                     uint base_min_length,
                                                     uint desired_min_length,
                                                     uint desired_max_length) const {
@@ -326,8 +330,8 @@
   const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
   const double survivor_regions_evac_time = predict_survivor_regions_evac_time();
   const size_t pending_cards = _analytics->predict_pending_cards();
-  const size_t adj_rs_lengths = rs_lengths + _analytics->predict_rs_length_diff();
-  const size_t scanned_cards = _analytics->predict_card_num(adj_rs_lengths, true /* for_young_gc */);
+  const size_t adj_rs_length = rs_length + _analytics->predict_rs_length_diff();
+  const size_t scanned_cards = _analytics->predict_card_num(adj_rs_length, true /* for_young_gc */);
   const double base_time_ms =
     predict_base_elapsed_time_ms(pending_cards, scanned_cards) +
     survivor_regions_evac_time;
@@ -414,25 +418,25 @@
   return survivor_regions_evac_time;
 }
 
-void G1Policy::revise_young_list_target_length_if_necessary(size_t rs_lengths) {
+void G1Policy::revise_young_list_target_length_if_necessary(size_t rs_length) {
   guarantee(use_adaptive_young_list_length(), "should not call this otherwise" );
 
-  if (rs_lengths > _rs_lengths_prediction) {
+  if (rs_length > _rs_length_prediction) {
     // add 10% to avoid having to recalculate often
-    size_t rs_lengths_prediction = rs_lengths * 1100 / 1000;
-    update_rs_lengths_prediction(rs_lengths_prediction);
+    size_t rs_length_prediction = rs_length * 1100 / 1000;
+    update_rs_length_prediction(rs_length_prediction);
 
-    update_young_list_max_and_target_length(rs_lengths_prediction);
+    update_young_list_max_and_target_length(rs_length_prediction);
   }
 }
 
-void G1Policy::update_rs_lengths_prediction() {
-  update_rs_lengths_prediction(_analytics->predict_rs_lengths());
+void G1Policy::update_rs_length_prediction() {
+  update_rs_length_prediction(_analytics->predict_rs_length());
 }
 
-void G1Policy::update_rs_lengths_prediction(size_t prediction) {
+void G1Policy::update_rs_length_prediction(size_t prediction) {
   if (collector_state()->in_young_only_phase() && use_adaptive_young_list_length()) {
-    _rs_lengths_prediction = prediction;
+    _rs_length_prediction = prediction;
   }
 }
 
@@ -442,6 +446,7 @@
   collector_state()->set_in_young_only_phase(false);
   collector_state()->set_in_full_gc(true);
   _collection_set->clear_candidates();
+  record_concurrent_refinement_data(true /* is_full_collection */);
 }
 
 void G1Policy::record_full_collection_end() {
@@ -471,13 +476,68 @@
   // Reset survivors SurvRateGroup.
   _survivor_surv_rate_group->reset();
   update_young_list_max_and_target_length();
-  update_rs_lengths_prediction();
+  update_rs_length_prediction();
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
 
   _bytes_allocated_in_old_since_last_gc = 0;
 
   record_pause(FullGC, _full_collection_start_sec, end_sec);
 }
 
+void G1Policy::record_concurrent_refinement_data(bool is_full_collection) {
+  _pending_cards_at_gc_start = _g1h->pending_card_num();
+
+  // Record info about concurrent refinement thread processing.
+  G1ConcurrentRefine* cr = _g1h->concurrent_refine();
+  G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats();
+
+  Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time;
+  _total_concurrent_refinement_time = cr_stats._time;
+
+  size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards;
+  _total_concurrent_refined_cards = cr_stats._cards;
+
+  // Don't update rate if full collection.  We could be in an implicit full
+  // collection after a non-full collection failure, in which case there
+  // wasn't any mutator/cr-thread activity since last recording.  And if
+  // we're in an explicit full collection, the time since the last GC can
+  // be arbitrarily short, so not a very good sample.  Similarly, don't
+  // update the rate if the current sample is empty or time is zero.
+  if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) {
+    double rate = cr_cards / (cr_time.seconds() * MILLIUNITS);
+    _analytics->report_concurrent_refine_rate_ms(rate);
+  }
+
+  // Record info about mutator thread processing.
+  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
+  size_t mut_total_cards = dcqs.total_mutator_refined_cards();
+  size_t mut_cards = mut_total_cards - _total_mutator_refined_cards;
+  _total_mutator_refined_cards = mut_total_cards;
+
+  // Record mutator's card logging rate.
+  // Don't update if full collection; see above.
+  if (!is_full_collection) {
+    size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards;
+    assert(_pending_cards_at_prev_gc_end <= total_cards,
+           "untracked cards: last pending: " SIZE_FORMAT
+           ", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT
+           ", mut refine:" SIZE_FORMAT,
+           _pending_cards_at_prev_gc_end, _pending_cards_at_gc_start,
+           cr_cards, mut_cards);
+    size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end;
+    double logging_start_time = _analytics->prev_collection_pause_end_ms();
+    double logging_end_time = Ticks::now().seconds() * MILLIUNITS;
+    double logging_time = logging_end_time - logging_start_time;
+    // Unlike above for conc-refine rate, here we should not require a
+    // non-empty sample, since an application could go some time with only
+    // young-gen or filtered out writes.  But we'll ignore unusually short
+    // sample periods, as they may just pollute the predictions.
+    if (logging_time > 1.0) {   // Require > 1ms sample time.
+      _analytics->report_logged_cards_rate_ms(logged_cards / logging_time);
+    }
+  }
+}
+
 void G1Policy::record_collection_pause_start(double start_time_sec) {
   // We only need to do this here as the policy will only be applied
   // to the GC we're about to start. so, no point is calculating this
@@ -490,7 +550,8 @@
   assert_used_and_recalculate_used_equal(_g1h);
 
   phase_times()->record_cur_collection_start_sec(start_time_sec);
-  _pending_cards = _g1h->pending_card_num();
+
+  record_concurrent_refinement_data(false /* is_full_collection */);
 
   _collection_set->reset_bytes_used_before();
   _bytes_copied_during_gc = 0;
@@ -572,10 +633,24 @@
   return result;
 }
 
+double G1Policy::logged_cards_processing_time() const {
+  double all_cards_processing_time = average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR);
+  size_t logged_dirty_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+  size_t scan_heap_roots_cards = phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
+                                 phase_times()->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
+  // This may happen if there are duplicate cards in different log buffers.
+  if (logged_dirty_cards > scan_heap_roots_cards) {
+    return all_cards_processing_time + average_time_ms(G1GCPhaseTimes::MergeLB);
+  }
+  return (all_cards_processing_time * logged_dirty_cards / scan_heap_roots_cards) + average_time_ms(G1GCPhaseTimes::MergeLB);
+}
+
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1Policy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned, size_t heap_used_bytes_before_gc) {
+void G1Policy::record_collection_pause_end(double pause_time_ms, size_t heap_used_bytes_before_gc) {
+  G1GCPhaseTimes* p = phase_times();
+
   double end_time_sec = os::elapsedTime();
 
   assert_used_and_recalculate_used_equal(_g1h);
@@ -645,49 +720,62 @@
   _short_lived_surv_rate_group->start_adding_regions();
   // Do that for any other surv rate groups
 
-  double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::ScanHCC) : 0.0;
+  double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0;
 
   if (update_stats) {
-    double cost_per_card_ms = 0.0;
-    if (_pending_cards > 0) {
-      cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS)) / (double) _pending_cards;
-      _analytics->report_cost_per_card_ms(cost_per_card_ms);
+    double cost_per_logged_card = 0.0;
+    size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards);
+    if (pending_logged_cards > 0) {
+      cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards;
+      _analytics->report_cost_per_logged_card_ms(cost_per_logged_card);
     }
     _analytics->report_cost_scan_hcc(scan_hcc_time_ms);
 
-    double cost_per_entry_ms = 0.0;
-    if (cards_scanned > 10) {
-      double avg_time_scan_rs = average_time_ms(G1GCPhaseTimes::ScanRS);
-      if (this_pause_was_young_only) {
-        avg_time_scan_rs += average_time_ms(G1GCPhaseTimes::OptScanRS);
-      }
-      cost_per_entry_ms = avg_time_scan_rs / cards_scanned;
-      _analytics->report_cost_per_entry_ms(cost_per_entry_ms, this_pause_was_young_only);
+    size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) +
+                                       p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards);
+    size_t remset_cards_scanned = 0;
+    // There might have been duplicate log buffer entries in the queues which could
+    // increase this value beyond the cards scanned. In this case attribute all cards
+    // to the log buffers.
+    if (pending_logged_cards <= total_cards_scanned) {
+      remset_cards_scanned = total_cards_scanned - pending_logged_cards;
     }
 
-    if (_max_rs_lengths > 0) {
+    double cost_per_remset_card_ms = 0.0;
+    if (remset_cards_scanned > 10) {
+      double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) *
+                                     remset_cards_scanned / total_cards_scanned) +
+                                     average_time_ms(G1GCPhaseTimes::MergeER) +
+                                     average_time_ms(G1GCPhaseTimes::MergeRS) +
+                                     average_time_ms(G1GCPhaseTimes::OptMergeRS);
+
+      cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned;
+      _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only);
+    }
+
+    if (_max_rs_length > 0) {
       double cards_per_entry_ratio =
-        (double) cards_scanned / (double) _max_rs_lengths;
+        (double) remset_cards_scanned / (double) _max_rs_length;
       _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only);
     }
 
-    // This is defensive. For a while _max_rs_lengths could get
-    // smaller than _recorded_rs_lengths which was causing
+    // This is defensive. For a while _max_rs_length could get
+    // smaller than _recorded_rs_length which was causing
     // rs_length_diff to get very large and mess up the RSet length
     // predictions. The reason was unsafe concurrent updates to the
-    // _inc_cset_recorded_rs_lengths field which the code below guards
+    // _inc_cset_recorded_rs_length field which the code below guards
     // against (see CR 7118202). This bug has now been fixed (see CR
     // 7119027). However, I'm still worried that
-    // _inc_cset_recorded_rs_lengths might still end up somewhat
+    // _inc_cset_recorded_rs_length might still end up somewhat
     // inaccurate. The concurrent refinement thread calculates an
     // RSet's length concurrently with other CR threads updating it
     // which might cause it to calculate the length incorrectly (if,
     // say, it's in mid-coarsening). So I'll leave in the defensive
     // conditional below just in case.
     size_t rs_length_diff = 0;
-    size_t recorded_rs_lengths = _collection_set->recorded_rs_lengths();
-    if (_max_rs_lengths > recorded_rs_lengths) {
-      rs_length_diff = _max_rs_lengths - recorded_rs_lengths;
+    size_t recorded_rs_length = _collection_set->recorded_rs_length();
+    if (_max_rs_length > recorded_rs_length) {
+      rs_length_diff = _max_rs_length - recorded_rs_length;
     }
     _analytics->report_rs_length_diff((double) rs_length_diff);
 
@@ -717,8 +805,8 @@
     // after the mixed gc phase.
     // During mixed gc we do not use them for young gen sizing.
     if (this_pause_was_young_only) {
-      _analytics->report_pending_cards((double) _pending_cards);
-      _analytics->report_rs_lengths((double) _max_rs_lengths);
+      _analytics->report_pending_cards((double) _pending_cards_at_gc_start);
+      _analytics->report_rs_length((double) _max_rs_length);
     }
   }
 
@@ -730,7 +818,7 @@
 
   _free_regions_at_end_of_collection = _g1h->num_free_regions();
 
-  update_rs_lengths_prediction();
+  update_rs_length_prediction();
 
   // Do not update dynamic IHOP due to G1 periodic collection as it is highly likely
   // that in this case we are not running in a "normal" operating mode.
@@ -759,20 +847,27 @@
   }
 
   // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
-  double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
+  double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
 
-  if (update_rs_time_goal_ms < scan_hcc_time_ms) {
+  if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) {
     log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)."
-                                "Update RS time goal: %1.2fms Scan HCC time: %1.2fms",
-                                update_rs_time_goal_ms, scan_hcc_time_ms);
+                                "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms",
+                                scan_logged_cards_time_goal_ms, scan_hcc_time_ms);
 
-    update_rs_time_goal_ms = 0;
+    scan_logged_cards_time_goal_ms = 0;
   } else {
-    update_rs_time_goal_ms -= scan_hcc_time_ms;
+    scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
   }
-  _g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS),
-                                    phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS),
-                                    update_rs_time_goal_ms);
+
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
+  double const logged_cards_time = logged_cards_processing_time();
+
+  log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
+                              scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms);
+
+  _g1h->concurrent_refine()->adjust(logged_cards_time,
+                                    phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards),
+                                    scan_logged_cards_time_goal_ms);
 }
 
 G1IHOPControl* G1Policy::create_ihop_control(const G1Predictions* predictor){
@@ -856,7 +951,7 @@
 }
 
 double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const {
-  size_t rs_length = _analytics->predict_rs_lengths() + _analytics->predict_rs_length_diff();
+  size_t rs_length = _analytics->predict_rs_length() + _analytics->predict_rs_length_diff();
   size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase());
   return predict_base_elapsed_time_ms(pending_cards, card_num);
 }
branch	datagramsocketimpl-branch
changeset 58678	9cf78a70fa4f
parent 54726	671e35cf611b
child 58679	9c3209ff7550