8231153: Improve concurrent refinement statistics
authorkbarrett
Tue, 08 Oct 2019 15:15:50 -0400
changeset 58508 d6058bd73982
parent 58507 7c1d0616828c
child 58509 7b41c88f8432
8231153: Improve concurrent refinement statistics 8043505: G1RemSet::_conc_refine_cards is incremented unsynchronized Summary: Added refinement rates and counts, removed _conc_refine_cards Reviewed-by: tschatzl, sjohanss
src/hotspot/share/gc/g1/g1Analytics.cpp
src/hotspot/share/gc/g1/g1Analytics.hpp
src/hotspot/share/gc/g1/g1CollectionSet.cpp
src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp
src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp
src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp
src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
src/hotspot/share/gc/g1/g1Policy.cpp
src/hotspot/share/gc/g1/g1Policy.hpp
src/hotspot/share/gc/g1/g1RemSet.cpp
src/hotspot/share/gc/g1/g1RemSet.hpp
src/hotspot/share/gc/g1/g1RemSetSummary.cpp
src/hotspot/share/gc/g1/g1RemSetSummary.hpp
test/hotspot/jtreg/gc/g1/TestRemsetLoggingThreads.java
test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java
--- a/src/hotspot/share/gc/g1/g1Analytics.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Analytics.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -78,6 +78,8 @@
     _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _prev_collection_pause_end_ms(0.0),
     _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
+    _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
     _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)),
     _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -102,6 +104,10 @@
   int index = MIN2(ParallelGCThreads - 1, 7u);
 
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
+  // Start with inverse of maximum STW cost.
+  _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]);
+  // Some applications have very low rates for logging cards.
+  _logged_cards_rate_ms_seq->add(0.0);
   _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]);
   _cost_scan_hcc_seq->add(0.0);
   _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]);
@@ -159,6 +165,14 @@
     (pause_time_ms * _recent_prev_end_times_for_all_gcs_sec->num()) / interval_ms;
 }
 
+void G1Analytics::report_concurrent_refine_rate_ms(double cards_per_ms) {
+  _concurrent_refine_rate_ms_seq->add(cards_per_ms);
+}
+
+void G1Analytics::report_logged_cards_rate_ms(double cards_per_ms) {
+  _logged_cards_rate_ms_seq->add(cards_per_ms);
+}
+
 void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) {
   _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms);
 }
@@ -223,6 +237,14 @@
   return get_new_prediction(_alloc_rate_ms_seq);
 }
 
+double G1Analytics::predict_concurrent_refine_rate_ms() const {
+  return get_new_prediction(_concurrent_refine_rate_ms_seq);
+}
+
+double G1Analytics::predict_logged_cards_rate_ms() const {
+  return get_new_prediction(_logged_cards_rate_ms_seq);
+}
+
 double G1Analytics::predict_cost_per_logged_card_ms() const {
   return get_new_prediction(_cost_per_logged_card_ms_seq);
 }
--- a/src/hotspot/share/gc/g1/g1Analytics.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Analytics.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -46,6 +46,8 @@
   double        _prev_collection_pause_end_ms;
 
   TruncatedSeq* _rs_length_diff_seq;
+  TruncatedSeq* _concurrent_refine_rate_ms_seq;
+  TruncatedSeq* _logged_cards_rate_ms_seq;
   TruncatedSeq* _cost_per_logged_card_ms_seq;
   TruncatedSeq* _cost_scan_hcc_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
@@ -99,6 +101,8 @@
   void report_concurrent_mark_remark_times_ms(double ms);
   void report_concurrent_mark_cleanup_times_ms(double ms);
   void report_alloc_rate_ms(double alloc_rate);
+  void report_concurrent_refine_rate_ms(double cards_per_ms);
+  void report_logged_cards_rate_ms(double cards_per_ms);
   void report_cost_per_logged_card_ms(double cost_per_logged_card_ms);
   void report_cost_scan_hcc(double cost_scan_hcc);
   void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc);
@@ -116,6 +120,8 @@
   double predict_alloc_rate_ms() const;
   int num_alloc_rate_ms() const;
 
+  double predict_concurrent_refine_rate_ms() const;
+  double predict_logged_cards_rate_ms() const;
   double predict_cost_per_logged_card_ms() const;
 
   double predict_scan_hcc_ms() const;
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -409,7 +409,7 @@
   guarantee(target_pause_time_ms > 0.0,
             "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms);
 
-  size_t pending_cards = _policy->pending_cards();
+  size_t pending_cards = _policy->pending_cards_at_gc_start();
   double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards);
   double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
 
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -412,6 +412,22 @@
   dcqs.notify_if_necessary();
 }
 
+G1ConcurrentRefine::RefinementStats G1ConcurrentRefine::total_refinement_stats() const {
+  struct CollectData : public ThreadClosure {
+    Tickspan _total_time;
+    size_t _total_cards;
+    CollectData() : _total_time(), _total_cards(0) {}
+    virtual void do_thread(Thread* t) {
+      G1ConcurrentRefineThread* crt = static_cast<G1ConcurrentRefineThread*>(t);
+      _total_time += crt->total_refinement_time();
+      _total_cards += crt->total_refined_cards();
+    }
+  } collector;
+  // Cast away const so we can call non-modifying closure on threads.
+  const_cast<G1ConcurrentRefine*>(this)->threads_do(&collector);
+  return RefinementStats(collector._total_time, collector._total_cards);
+}
+
 size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
   Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
   return activation_level(thresholds);
@@ -432,7 +448,8 @@
   }
 }
 
-bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
+bool G1ConcurrentRefine::do_refinement_step(uint worker_id,
+                                            size_t* total_refined_cards) {
   G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
 
   size_t curr_cards = dcqs.num_cards();
@@ -448,5 +465,6 @@
 
   // Process the next buffer, if there are enough left.
   return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
-                                                   deactivation_threshold(worker_id));
+                                                   deactivation_threshold(worker_id),
+                                                   total_refined_cards);
 }
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -27,6 +27,7 @@
 
 #include "memory/allocation.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/ticks.hpp"
 
 // Forward decl
 class G1ConcurrentRefine;
@@ -118,11 +119,22 @@
   // Adjust refinement thresholds based on work done during the pause and the goal time.
   void adjust(double logged_cards_scan_time, size_t processed_logged_cards, double goal_ms);
 
+  struct RefinementStats {
+    Tickspan _time;
+    size_t _cards;
+    RefinementStats(Tickspan time, size_t cards) : _time(time), _cards(cards) {}
+  };
+
+  RefinementStats total_refinement_stats() const;
+
   // Cards in the dirty card queue set.
   size_t activation_threshold(uint worker_id) const;
   size_t deactivation_threshold(uint worker_id) const;
-  // Perform a single refinement step. Called by the refinement threads when woken up.
-  bool do_refinement_step(uint worker_id);
+
+  // Perform a single refinement step; called by the refinement
+  // threads.  Returns true if there was refinement work available.
+  // Increments *total_refined_cards.
+  bool do_refinement_step(uint worker_id, size_t* total_refined_cards);
 
   // Iterate over all concurrent refinement threads applying the given closure.
   void threads_do(ThreadClosure *tc);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -37,6 +37,8 @@
   ConcurrentGCThread(),
   _vtime_start(0.0),
   _vtime_accum(0.0),
+  _total_refinement_time(),
+  _total_refined_cards(0),
   _worker_id(worker_id),
   _active(false),
   _monitor(NULL),
@@ -101,11 +103,12 @@
       break;
     }
 
-    size_t buffers_processed = 0;
     log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
                           _worker_id, _cr->activation_threshold(_worker_id),
                           G1BarrierSet::dirty_card_queue_set().num_cards());
 
+    size_t start_total_refined_cards = _total_refined_cards; // For logging.
+
     {
       SuspendibleThreadSetJoiner sts_join;
 
@@ -115,20 +118,22 @@
           continue;             // Re-check for termination after yield delay.
         }
 
-        if (!_cr->do_refinement_step(_worker_id)) {
-          break;
+        Ticks start_time = Ticks::now();
+        if (!_cr->do_refinement_step(_worker_id, &_total_refined_cards)) {
+          break;                // No cards to process.
         }
-        ++buffers_processed;
+        _total_refinement_time += (Ticks::now() - start_time);
       }
     }
 
     deactivate();
     log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT
-                          ", current: " SIZE_FORMAT ", buffers processed: "
-                          SIZE_FORMAT,
+                          ", current: " SIZE_FORMAT ", refined cards: "
+                          SIZE_FORMAT ", total refined cards: " SIZE_FORMAT,
                           _worker_id, _cr->deactivation_threshold(_worker_id),
                           G1BarrierSet::dirty_card_queue_set().num_cards(),
-                          buffers_processed);
+                          _total_refined_cards - start_total_refined_cards,
+                          _total_refined_cards);
 
     if (os::supports_vtime()) {
       _vtime_accum = (os::elapsedVTime() - _vtime_start);
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -26,6 +26,7 @@
 #define SHARE_GC_G1_G1CONCURRENTREFINETHREAD_HPP
 
 #include "gc/shared/concurrentGCThread.hpp"
+#include "utilities/ticks.hpp"
 
 // Forward Decl.
 class G1ConcurrentRefine;
@@ -38,6 +39,10 @@
 
   double _vtime_start;  // Initial virtual time.
   double _vtime_accum;  // Accumulated virtual time.
+
+  Tickspan _total_refinement_time;
+  size_t _total_refined_cards;
+
   uint _worker_id;
 
   bool _active;
@@ -61,6 +66,9 @@
   // Activate this thread.
   void activate();
 
+  Tickspan total_refinement_time() const { return _total_refinement_time; }
+  size_t total_refined_cards() const { return _total_refined_cards; }
+
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
 };
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -37,6 +37,7 @@
 #include "runtime/atomic.hpp"
 #include "runtime/flags/flagSetting.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
@@ -62,6 +63,9 @@
   }
 }
 
+// Assumed to be zero by concurrent threads.
+static uint par_ids_start() { return 0; }
+
 G1DirtyCardQueueSet::G1DirtyCardQueueSet(Monitor* cbl_mon,
                                          BufferNode::Allocator* allocator) :
   PtrQueueSet(allocator),
@@ -73,15 +77,16 @@
   _process_completed_buffers(false),
   _max_cards(MaxCardsUnlimited),
   _max_cards_padding(0),
-  _free_ids(0, num_par_ids()),
-  _processed_buffers_mut(0),
-  _processed_buffers_rs_thread(0)
+  _free_ids(par_ids_start(), num_par_ids()),
+  _mutator_refined_cards_counters(NEW_C_HEAP_ARRAY(size_t, num_par_ids(), mtGC))
 {
+  ::memset(_mutator_refined_cards_counters, 0, num_par_ids() * sizeof(size_t));
   _all_active = true;
 }
 
 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
   abandon_completed_buffers();
+  FREE_C_HEAP_ARRAY(size_t, _mutator_refined_cards_counters);
 }
 
 // Determines how many mutator threads can process the buffers in parallel.
@@ -89,6 +94,14 @@
   return (uint)os::initial_active_processor_count();
 }
 
+size_t G1DirtyCardQueueSet::total_mutator_refined_cards() const {
+  size_t sum = 0;
+  for (uint i = 0; i < num_par_ids(); ++i) {
+    sum += _mutator_refined_cards_counters[i];
+  }
+  return sum;
+}
+
 void G1DirtyCardQueueSet::handle_zero_index_for_thread(Thread* t) {
   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 }
@@ -213,7 +226,9 @@
   return result;
 }
 
-bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, uint worker_id) {
+bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
+                                        uint worker_id,
+                                        size_t* total_refined_cards) {
   G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
   size_t size = buffer_size();
   void** buffer = BufferNode::make_buffer_from_node(node);
@@ -223,6 +238,7 @@
     CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
     rem_set->refine_card_concurrently(cp, worker_id);
   }
+  *total_refined_cards += (i - node->index());
   node->set_index(i);
   return i == size;
 }
@@ -260,25 +276,27 @@
 
 bool G1DirtyCardQueueSet::mut_process_buffer(BufferNode* node) {
   uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id
-  bool result = refine_buffer(node, worker_id);
+  uint counter_index = worker_id - par_ids_start();
+  size_t* counter = &_mutator_refined_cards_counters[counter_index];
+  bool result = refine_buffer(node, worker_id, counter);
   _free_ids.release_par_id(worker_id); // release the id
 
   if (result) {
     assert_fully_consumed(node, buffer_size());
-    Atomic::inc(&_processed_buffers_mut);
   }
   return result;
 }
 
-bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id, size_t stop_at) {
+bool G1DirtyCardQueueSet::refine_completed_buffer_concurrently(uint worker_id,
+                                                               size_t stop_at,
+                                                               size_t* total_refined_cards) {
   BufferNode* node = get_completed_buffer(stop_at);
   if (node == NULL) {
     return false;
-  } else if (refine_buffer(node, worker_id)) {
+  } else if (refine_buffer(node, worker_id, total_refined_cards)) {
     assert_fully_consumed(node, buffer_size());
     // Done with fully processed buffer.
     deallocate_buffer(node);
-    Atomic::inc(&_processed_buffers_rs_thread);
     return true;
   } else {
     // Return partially processed buffer to the queue.
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -78,14 +78,15 @@
 
   void abandon_completed_buffers();
 
-  // Refine the cards in "node" from it's index to buffer_size.
+  // Refine the cards in "node" from its index to buffer_size.
   // Stops processing if SuspendibleThreadSet::should_yield() is true.
   // Returns true if the entire buffer was processed, false if there
   // is a pending yield request.  The node's index is updated to exclude
   // the processed elements, e.g. up to the element before processing
   // stopped, or one past the last element if the entire buffer was
-  // processed.
-  bool refine_buffer(BufferNode* node, uint worker_id);
+  // processed. Increments *total_refined_cards by the number of cards
+  // processed and removed from the buffer.
+  bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards);
 
   bool mut_process_buffer(BufferNode* node);
 
@@ -97,10 +98,9 @@
 
   G1FreeIdSet _free_ids;
 
-  // The number of completed buffers processed by mutator and rs thread,
-  // respectively.
-  jint _processed_buffers_mut;
-  jint _processed_buffers_rs_thread;
+  // Array of cumulative dirty cards refined by mutator threads.
+  // Array has an entry per id in _free_ids.
+  size_t* _mutator_refined_cards_counters;
 
 public:
   G1DirtyCardQueueSet(Monitor* cbl_mon, BufferNode::Allocator* allocator);
@@ -158,7 +158,12 @@
   // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
   // returning the incompletely processed buffer to the completed buffer
   // list, for later processing of the remainder.
-  bool refine_completed_buffer_concurrently(uint worker_id, size_t stop_at);
+  //
+  // Increments *total_refined_cards by the number of cards processed and
+  // removed from the buffer.
+  bool refine_completed_buffer_concurrently(uint worker_id,
+                                            size_t stop_at,
+                                            size_t* total_refined_cards);
 
   // If a full collection is happening, reset partial logs, and release
   // completed ones: the full collection will make them all irrelevant.
@@ -181,13 +186,8 @@
     return _max_cards_padding;
   }
 
-  jint processed_buffers_mut() {
-    return _processed_buffers_mut;
-  }
-  jint processed_buffers_rs_thread() {
-    return _processed_buffers_rs_thread;
-  }
-
+  // Total dirty cards refined by mutator threads.
+  size_t total_mutator_refined_cards() const;
 };
 
 inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
--- a/src/hotspot/share/gc/g1/g1Policy.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Policy.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -70,7 +70,11 @@
   _free_regions_at_end_of_collection(0),
   _max_rs_length(0),
   _rs_length_prediction(0),
-  _pending_cards(0),
+  _pending_cards_at_gc_start(0),
+  _pending_cards_at_prev_gc_end(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
+  _total_concurrent_refinement_time(),
   _bytes_allocated_in_old_since_last_gc(0),
   _initial_mark_to_mixed(),
   _collection_set(NULL),
@@ -442,6 +446,7 @@
   collector_state()->set_in_young_only_phase(false);
   collector_state()->set_in_full_gc(true);
   _collection_set->clear_candidates();
+  record_concurrent_refinement_data(true /* is_full_collection */);
 }
 
 void G1Policy::record_full_collection_end() {
@@ -472,12 +477,67 @@
   _survivor_surv_rate_group->reset();
   update_young_list_max_and_target_length();
   update_rs_length_prediction();
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
 
   _bytes_allocated_in_old_since_last_gc = 0;
 
   record_pause(FullGC, _full_collection_start_sec, end_sec);
 }
 
+void G1Policy::record_concurrent_refinement_data(bool is_full_collection) {
+  _pending_cards_at_gc_start = _g1h->pending_card_num();
+
+  // Record info about concurrent refinement thread processing.
+  G1ConcurrentRefine* cr = _g1h->concurrent_refine();
+  G1ConcurrentRefine::RefinementStats cr_stats = cr->total_refinement_stats();
+
+  Tickspan cr_time = cr_stats._time - _total_concurrent_refinement_time;
+  _total_concurrent_refinement_time = cr_stats._time;
+
+  size_t cr_cards = cr_stats._cards - _total_concurrent_refined_cards;
+  _total_concurrent_refined_cards = cr_stats._cards;
+
+  // Don't update rate if full collection.  We could be in an implicit full
+  // collection after a non-full collection failure, in which case there
+  // wasn't any mutator/cr-thread activity since last recording.  And if
+  // we're in an explicit full collection, the time since the last GC can
+  // be arbitrarily short, so not a very good sample.  Similarly, don't
+  // update the rate if the current sample is empty or time is zero.
+  if (!is_full_collection && (cr_cards > 0) && (cr_time > Tickspan())) {
+    double rate = cr_cards / (cr_time.seconds() * MILLIUNITS);
+    _analytics->report_concurrent_refine_rate_ms(rate);
+  }
+
+  // Record info about mutator thread processing.
+  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
+  size_t mut_total_cards = dcqs.total_mutator_refined_cards();
+  size_t mut_cards = mut_total_cards - _total_mutator_refined_cards;
+  _total_mutator_refined_cards = mut_total_cards;
+
+  // Record mutator's card logging rate.
+  // Don't update if full collection; see above.
+  if (!is_full_collection) {
+    size_t total_cards = _pending_cards_at_gc_start + cr_cards + mut_cards;
+    assert(_pending_cards_at_prev_gc_end <= total_cards,
+           "untracked cards: last pending: " SIZE_FORMAT
+           ", pending: " SIZE_FORMAT ", conc refine: " SIZE_FORMAT
+           ", mut refine:" SIZE_FORMAT,
+           _pending_cards_at_prev_gc_end, _pending_cards_at_gc_start,
+           cr_cards, mut_cards);
+    size_t logged_cards = total_cards - _pending_cards_at_prev_gc_end;
+    double logging_start_time = _analytics->prev_collection_pause_end_ms();
+    double logging_end_time = Ticks::now().seconds() * MILLIUNITS;
+    double logging_time = logging_end_time - logging_start_time;
+    // Unlike above for conc-refine rate, here we should not require a
+    // non-empty sample, since an application could go some time with only
+    // young-gen or filtered out writes.  But we'll ignore unusually short
+    // sample periods, as they may just pollute the predictions.
+    if (logging_time > 1.0) {   // Require > 1ms sample time.
+      _analytics->report_logged_cards_rate_ms(logged_cards / logging_time);
+    }
+  }
+}
+
 void G1Policy::record_collection_pause_start(double start_time_sec) {
   // We only need to do this here as the policy will only be applied
   // to the GC we're about to start. so, no point is calculating this
@@ -490,7 +550,8 @@
   assert_used_and_recalculate_used_equal(_g1h);
 
   phase_times()->record_cur_collection_start_sec(start_time_sec);
-  _pending_cards = _g1h->pending_card_num();
+
+  record_concurrent_refinement_data(false /* is_full_collection */);
 
   _collection_set->reset_bytes_used_before();
   _bytes_copied_during_gc = 0;
@@ -744,7 +805,7 @@
     // after the mixed gc phase.
     // During mixed gc we do not use them for young gen sizing.
     if (this_pause_was_young_only) {
-      _analytics->report_pending_cards((double) _pending_cards);
+      _analytics->report_pending_cards((double) _pending_cards_at_gc_start);
       _analytics->report_rs_length((double) _max_rs_length);
     }
   }
@@ -798,6 +859,7 @@
     scan_logged_cards_time_goal_ms -= scan_hcc_time_ms;
   }
 
+  _pending_cards_at_prev_gc_end = _g1h->pending_card_num();
   double const logged_cards_time = logged_cards_processing_time();
 
   log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms",
--- a/src/hotspot/share/gc/g1/g1Policy.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -100,7 +100,11 @@
 
   size_t _rs_length_prediction;
 
-  size_t _pending_cards;
+  size_t _pending_cards_at_gc_start;
+  size_t _pending_cards_at_prev_gc_end;
+  size_t _total_mutator_refined_cards;
+  size_t _total_concurrent_refined_cards;
+  Tickspan _total_concurrent_refinement_time;
 
   // The amount of allocated bytes in old gen during the last mutator and the following
   // young GC phase.
@@ -244,7 +248,15 @@
                         uint base_free_regions, double target_pause_time_ms) const;
 
 public:
-  size_t pending_cards() const { return _pending_cards; }
+  size_t pending_cards_at_gc_start() const { return _pending_cards_at_gc_start; }
+
+  size_t total_concurrent_refined_cards() const {
+    return _total_concurrent_refined_cards;
+  }
+
+  size_t total_mutator_refined_cards() const {
+    return _total_mutator_refined_cards;
+  }
 
   // Calculate the minimum number of old regions we'll add to the CSet
   // during a mixed GC.
@@ -283,6 +295,9 @@
   void record_pause(PauseKind kind, double start, double end);
   // Indicate that we aborted marking before doing any mixed GCs.
   void abort_time_to_mixed_tracking();
+
+  void record_concurrent_refinement_data(bool is_full_collection);
+
 public:
 
   G1Policy(STWGCTimer* gc_timer);
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -489,7 +489,6 @@
   _scan_state(new G1RemSetScanState()),
   _prev_period_summary(),
   _g1h(g1h),
-  _num_conc_refined_cards(0),
   _ct(ct),
   _g1p(_g1h->policy()),
   _hot_card_cache(hot_card_cache) {
@@ -1377,7 +1376,6 @@
 
   G1ConcurrentRefineOopClosure conc_refine_cl(_g1h, worker_id);
   if (r->oops_on_memregion_seq_iterate_careful<false>(dirty_region, &conc_refine_cl) != NULL) {
-    _num_conc_refined_cards++; // Unsynchronized update, only used for logging.
     return;
   }
 
--- a/src/hotspot/share/gc/g1/g1RemSet.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSet.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -61,7 +61,6 @@
   G1RemSetSummary _prev_period_summary;
 
   G1CollectedHeap* _g1h;
-  size_t _num_conc_refined_cards; // Number of cards refined concurrently to the mutator.
 
   G1CardTable*           _ct;
   G1Policy*              _g1p;
@@ -125,8 +124,6 @@
   // Print accumulated summary info from the last time called.
   void print_periodic_summary_info(const char* header, uint period_count);
 
-  size_t num_conc_refined_cards() const { return _num_conc_refined_cards; }
-
   // Rebuilds the remembered set by scanning from bottom to TARS for all regions
   // using the given work gang.
   void rebuild_rem_set(G1ConcurrentMark* cm, WorkGang* workers, uint worker_id_offset);
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Tue Oct 08 15:15:50 2019 -0400
@@ -27,6 +27,7 @@
 #include "gc/g1/g1ConcurrentRefine.hpp"
 #include "gc/g1/g1ConcurrentRefineThread.hpp"
 #include "gc/g1/g1DirtyCardQueue.hpp"
+#include "gc/g1/g1Policy.hpp"
 #include "gc/g1/g1RemSet.hpp"
 #include "gc/g1/g1RemSetSummary.hpp"
 #include "gc/g1/g1YoungRemSetSamplingThread.hpp"
@@ -53,18 +54,17 @@
 };
 
 void G1RemSetSummary::update() {
-  _num_conc_refined_cards = _rem_set->num_conc_refined_cards();
-  G1DirtyCardQueueSet& dcqs = G1BarrierSet::dirty_card_queue_set();
-  _num_processed_buf_mutator = dcqs.processed_buffers_mut();
-  _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread();
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  const G1Policy* policy = g1h->policy();
+  _total_mutator_refined_cards = policy->total_mutator_refined_cards();
+  _total_concurrent_refined_cards = policy->total_concurrent_refined_cards();
 
   _num_coarsenings = HeapRegionRemSet::n_coarsenings();
 
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  G1ConcurrentRefine* cg1r = g1h->concurrent_refine();
   if (_rs_threads_vtimes != NULL) {
     GetRSThreadVTimeClosure p(this);
-    cg1r->threads_do(&p);
+    g1h->concurrent_refine()->threads_do(&p);
   }
   set_sampling_thread_vtime(g1h->sampling_thread()->vtime_accum());
 }
@@ -83,9 +83,8 @@
 
 G1RemSetSummary::G1RemSetSummary() :
   _rem_set(NULL),
-  _num_conc_refined_cards(0),
-  _num_processed_buf_mutator(0),
-  _num_processed_buf_rs_threads(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
   _num_coarsenings(0),
   _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
@@ -96,9 +95,8 @@
 
 G1RemSetSummary::G1RemSetSummary(G1RemSet* rem_set) :
   _rem_set(rem_set),
-  _num_conc_refined_cards(0),
-  _num_processed_buf_mutator(0),
-  _num_processed_buf_rs_threads(0),
+  _total_mutator_refined_cards(0),
+  _total_concurrent_refined_cards(0),
   _num_coarsenings(0),
   _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
@@ -114,12 +112,10 @@
   assert(other != NULL, "just checking");
   assert(_num_vtimes == other->_num_vtimes, "just checking");
 
-  _num_conc_refined_cards = other->num_conc_refined_cards();
+  _total_mutator_refined_cards = other->total_mutator_refined_cards();
+  _total_concurrent_refined_cards = other->total_concurrent_refined_cards();
 
-  _num_processed_buf_mutator = other->num_processed_buf_mutator();
-  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads();
-
-  _num_coarsenings = other->_num_coarsenings;
+  _num_coarsenings = other->num_coarsenings();
 
   memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes);
 
@@ -130,10 +126,8 @@
   assert(other != NULL, "just checking");
   assert(_num_vtimes == other->_num_vtimes, "just checking");
 
-  _num_conc_refined_cards = other->num_conc_refined_cards() - _num_conc_refined_cards;
-
-  _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator;
-  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads;
+  _total_mutator_refined_cards = other->total_mutator_refined_cards() - _total_mutator_refined_cards;
+  _total_concurrent_refined_cards = other->total_concurrent_refined_cards() - _total_concurrent_refined_cards;
 
   _num_coarsenings = other->num_coarsenings() - _num_coarsenings;
 
@@ -356,16 +350,15 @@
 
 void G1RemSetSummary::print_on(outputStream* out) {
   out->print_cr(" Recent concurrent refinement statistics");
-  out->print_cr("  Processed " SIZE_FORMAT " cards concurrently", num_conc_refined_cards());
-  out->print_cr("  Of " SIZE_FORMAT " completed buffers:", num_processed_buf_total());
-  out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent RS threads.",
-                num_processed_buf_total(),
-                percent_of(num_processed_buf_rs_threads(), num_processed_buf_total()));
+  out->print_cr("  Of " SIZE_FORMAT " refined cards:", total_refined_cards());
+  out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by concurrent refinement threads.",
+                total_concurrent_refined_cards(),
+                percent_of(total_concurrent_refined_cards(), total_refined_cards()));
   out->print_cr("     " SIZE_FORMAT_W(8) " (%5.1f%%) by mutator threads.",
-                num_processed_buf_mutator(),
-                percent_of(num_processed_buf_mutator(), num_processed_buf_total()));
+                total_mutator_refined_cards(),
+                percent_of(total_mutator_refined_cards(), total_refined_cards()));
   out->print_cr("  Did " SIZE_FORMAT " coarsenings.", num_coarsenings());
-  out->print_cr("  Concurrent RS threads times (s)");
+  out->print_cr("  Concurrent refinement threads times (s)");
   out->print("     ");
   for (uint i = 0; i < _num_vtimes; i++) {
     out->print("    %5.2f", rs_thread_vtime(i));
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.hpp	Tue Oct 08 20:47:46 2019 +0200
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.hpp	Tue Oct 08 15:15:50 2019 -0400
@@ -38,9 +38,8 @@
 
   G1RemSet* _rem_set;
 
-  size_t _num_conc_refined_cards;
-  size_t _num_processed_buf_mutator;
-  size_t _num_processed_buf_rs_threads;
+  size_t _total_mutator_refined_cards;
+  size_t _total_concurrent_refined_cards;
 
   size_t _num_coarsenings;
 
@@ -76,20 +75,16 @@
     return _sampling_thread_vtime;
   }
 
-  size_t num_conc_refined_cards() const {
-    return _num_conc_refined_cards;
+  size_t total_mutator_refined_cards() const {
+    return _total_mutator_refined_cards;
   }
 
-  size_t num_processed_buf_mutator() const {
-    return _num_processed_buf_mutator;
+  size_t total_concurrent_refined_cards() const {
+    return _total_concurrent_refined_cards;
   }
 
-  size_t num_processed_buf_rs_threads() const {
-    return _num_processed_buf_rs_threads;
-  }
-
-  size_t num_processed_buf_total() const {
-    return num_processed_buf_mutator() + num_processed_buf_rs_threads();
+  size_t total_refined_cards() const {
+    return total_mutator_refined_cards() + total_concurrent_refined_cards();
   }
 
   size_t num_coarsenings() const {
--- a/test/hotspot/jtreg/gc/g1/TestRemsetLoggingThreads.java	Tue Oct 08 20:47:46 2019 +0200
+++ b/test/hotspot/jtreg/gc/g1/TestRemsetLoggingThreads.java	Tue Oct 08 15:15:50 2019 -0400
@@ -54,7 +54,7 @@
 
     OutputAnalyzer output = new OutputAnalyzer(pb.start());
 
-    String pattern = "Concurrent RS threads times \\(s\\)$";
+    String pattern = "Concurrent refinement threads times \\(s\\)$";
     Matcher m = Pattern.compile(pattern, Pattern.MULTILINE).matcher(output.getStdout());
 
     if (!m.find()) {
--- a/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java	Tue Oct 08 20:47:46 2019 +0200
+++ b/test/hotspot/jtreg/gc/g1/TestRemsetLoggingTools.java	Tue Oct 08 15:15:50 2019 -0400
@@ -110,7 +110,7 @@
     }
 
     public static void expectRSetSummaries(String result, int expectedCumulative, int expectedPeriodic) throws Exception {
-        int actualTotal = result.split("concurrent refinement").length - 1;
+        int actualTotal = result.split("concurrent refinement statistics").length - 1;
         int actualCumulative = result.split("Cumulative RS summary").length - 1;
 
         if (expectedCumulative != actualCumulative) {