8013895: G1: G1SummarizeRSetStats output on Linux needs improvemen
authortschatzl
Tue, 28 May 2013 09:32:06 +0200
changeset 17854 d65bc1546091
parent 17853 54b0245a5aa8
child 17855 9d0719d7bb85
child 17856 8871abcf70fa
8013895: G1: G1SummarizeRSetStats output on Linux needs improvemen Summary: Fixed the output of G1SummarizeRSetStats: too small datatype for the number of concurrently processed cards, added concurrent remembered set thread time retrieval for Linux and Windows (BSD uses os::elapsedTime() now), and other cleanup. The information presented during VM operation is now relative to the previous output, not always cumulative if G1SummarizeRSetStatsPeriod > 0. At VM exit, the code prints a cumulative summary. Reviewed-by: johnc, jwilhelm
hotspot/make/excludeSrc.make
hotspot/src/os/bsd/vm/os_bsd.cpp
hotspot/src/os/linux/vm/os_linux.cpp
hotspot/src/os/windows/vm/os_windows.cpp
hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp
hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.hpp
hotspot/test/gc/g1/TestSummarizeRSetStats.java
--- a/hotspot/make/excludeSrc.make	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/make/excludeSrc.make	Tue May 28 09:32:06 2013 +0200
@@ -87,7 +87,7 @@
 	g1BlockOffsetTable.cpp g1CardCounts.cpp g1CollectedHeap.cpp g1CollectorPolicy.cpp \
 	g1ErgoVerbose.cpp g1GCPhaseTimes.cpp g1HRPrinter.cpp g1HotCardCache.cpp g1Log.cpp \
 	g1MMUTracker.cpp g1MarkSweep.cpp g1MemoryPool.cpp g1MonitoringSupport.cpp \
-	g1RemSet.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
+	g1RemSet.cpp g1RemSetSummary.cpp g1SATBCardTableModRefBS.cpp g1_globals.cpp heapRegion.cpp \
 	heapRegionRemSet.cpp heapRegionSeq.cpp heapRegionSet.cpp heapRegionSets.cpp \
 	ptrQueue.cpp satbQueue.cpp sparsePRT.cpp survRateGroup.cpp vm_operations_g1.cpp \
 	adjoiningGenerations.cpp adjoiningVirtualSpaces.cpp asPSOldGen.cpp asPSYoungGen.cpp \
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp	Tue May 28 09:32:06 2013 +0200
@@ -935,10 +935,10 @@
   return (1000 * 1000);
 }
 
-// XXX: For now, code this as if BSD does not support vtime.
-bool os::supports_vtime() { return false; }
+bool os::supports_vtime() { return true; }
 bool os::enable_vtime()   { return false; }
 bool os::vtime_enabled()  { return false; }
+
 double os::elapsedVTime() {
   // better than nothing, but not much
   return elapsedTime();
--- a/hotspot/src/os/linux/vm/os_linux.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/os/linux/vm/os_linux.cpp	Tue May 28 09:32:06 2013 +0200
@@ -101,6 +101,12 @@
 # include <inttypes.h>
 # include <sys/ioctl.h>
 
+// if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling
+// getrusage() is prepared to handle the associated failure.
+#ifndef RUSAGE_THREAD
+#define RUSAGE_THREAD   (1)               /* only the calling thread */
+#endif
+
 #define MAX_PATH    (2 * K)
 
 // for timer info max values which include all bits
@@ -1336,15 +1342,19 @@
   return (1000 * 1000);
 }
 
-// For now, we say that linux does not support vtime.  I have no idea
-// whether it can actually be made to (DLD, 9/13/05).
-
-bool os::supports_vtime() { return false; }
+bool os::supports_vtime() { return true; }
 bool os::enable_vtime()   { return false; }
 bool os::vtime_enabled()  { return false; }
+
 double os::elapsedVTime() {
-  // better than nothing, but not much
-  return elapsedTime();
+  struct rusage usage;
+  int retval = getrusage(RUSAGE_THREAD, &usage);
+  if (retval == 0) {
+    return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000);
+  } else {
+    // better than nothing, but not much
+    return elapsedTime();
+  }
 }
 
 jlong os::javaTimeMillis() {
--- a/hotspot/src/os/windows/vm/os_windows.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/os/windows/vm/os_windows.cpp	Tue May 28 09:32:06 2013 +0200
@@ -813,15 +813,21 @@
   return result;
 }
 
-// For now, we say that Windows does not support vtime.  I have no idea
-// whether it can actually be made to (DLD, 9/13/05).
-
-bool os::supports_vtime() { return false; }
+bool os::supports_vtime() { return true; }
 bool os::enable_vtime() { return false; }
 bool os::vtime_enabled() { return false; }
+
 double os::elapsedVTime() {
-  // better than nothing, but not much
-  return elapsedTime();
+  FILETIME created;
+  FILETIME exited;
+  FILETIME kernel;
+  FILETIME user;
+  if (GetThreadTimes(GetCurrentThread(), &created, &exited, &kernel, &user) != 0) {
+    // the resolution of windows_to_java_time() should be sufficient (ms)
+    return (double) (windows_to_java_time(kernel) + windows_to_java_time(user)) / MILLIUNITS;
+  } else {
+    return elapsedTime();
+  }
 }
 
 jlong os::javaTimeMillis() {
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Tue May 28 09:32:06 2013 +0200
@@ -114,6 +114,14 @@
   }
 }
 
+void ConcurrentG1Refine::worker_threads_do(ThreadClosure * tc) {
+  if (_threads != NULL) {
+    for (int i = 0; i < worker_thread_num(); i++) {
+      tc->do_thread(_threads[i]);
+    }
+  }
+}
+
 int ConcurrentG1Refine::thread_num() {
   int n_threads = (G1ConcRefinementThreads > 0) ? G1ConcRefinementThreads
                                                 : ParallelGCThreads;
@@ -126,3 +134,7 @@
     st->cr();
   }
 }
+
+ConcurrentG1RefineThread * ConcurrentG1Refine::sampling_thread() const {
+  return _threads[worker_thread_num()];
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Tue May 28 09:32:06 2013 +0200
@@ -35,6 +35,7 @@
 class G1CollectedHeap;
 class G1HotCardCache;
 class G1RemSet;
+class DirtyCardQueue;
 
 class ConcurrentG1Refine: public CHeapObj<mtGC> {
   ConcurrentG1RefineThread** _threads;
@@ -78,9 +79,15 @@
 
   void reinitialize_threads();
 
-  // Iterate over the conc refine threads
+  // Iterate over all concurrent refinement threads
   void threads_do(ThreadClosure *tc);
 
+  // Iterate over all worker refinement threads
+  void worker_threads_do(ThreadClosure * tc);
+
+  // The RS sampling thread
+  ConcurrentG1RefineThread * sampling_thread() const;
+
   static int thread_num();
 
   void print_worker_threads_on(outputStream* st) const;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue May 28 09:32:06 2013 +0200
@@ -3539,6 +3539,14 @@
 }
 
 void G1CollectedHeap::gc_epilogue(bool full /* Ignored */) {
+
+  if (G1SummarizeRSetStats &&
+      (G1SummarizeRSetStatsPeriod > 0) &&
+      // we are at the end of the GC. Total collections has already been increased.
+      ((total_collections() - 1) % G1SummarizeRSetStatsPeriod == 0)) {
+    g1_rem_set()->print_periodic_summary_info();
+  }
+
   // FIXME: what is this about?
   // I'm ignoring the "fill_newgen()" call if "alloc_event_enabled"
   // is set.
@@ -4093,12 +4101,6 @@
     g1mm()->update_sizes();
   }
 
-  if (G1SummarizeRSetStats &&
-      (G1SummarizeRSetStatsPeriod > 0) &&
-      (total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
-    g1_rem_set()->print_summary_info();
-  }
-
   // It should now be safe to tell the concurrent mark thread to start
   // without its logging output interfering with the logging output
   // that came from the pause.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Tue May 28 09:32:06 2013 +0200
@@ -34,6 +34,7 @@
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "memory/iterator.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/intHisto.hpp"
@@ -73,7 +74,8 @@
     _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
     _cset_rs_update_cl(NULL),
-    _cards_scanned(NULL), _total_cards_scanned(0)
+    _cards_scanned(NULL), _total_cards_scanned(0),
+    _prev_period_summary()
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
   guarantee(n_workers() > 0, "There should be some workers");
@@ -81,6 +83,7 @@
   for (uint i = 0; i < n_workers(); i++) {
     _cset_rs_update_cl[i] = NULL;
   }
+  _prev_period_summary.initialize(this, n_workers());
 }
 
 G1RemSet::~G1RemSet() {
@@ -697,47 +700,29 @@
   return has_refs_into_cset;
 }
 
-class HRRSStatsIter: public HeapRegionClosure {
-  size_t _occupied;
-  size_t _total_mem_sz;
-  size_t _max_mem_sz;
-  HeapRegion* _max_mem_sz_region;
-public:
-  HRRSStatsIter() :
-    _occupied(0),
-    _total_mem_sz(0),
-    _max_mem_sz(0),
-    _max_mem_sz_region(NULL)
-  {}
+void G1RemSet::print_periodic_summary_info() {
+  G1RemSetSummary current;
+  current.initialize(this, n_workers());
 
-  bool doHeapRegion(HeapRegion* r) {
-    if (r->continuesHumongous()) return false;
-    size_t mem_sz = r->rem_set()->mem_size();
-    if (mem_sz > _max_mem_sz) {
-      _max_mem_sz = mem_sz;
-      _max_mem_sz_region = r;
-    }
-    _total_mem_sz += mem_sz;
-    size_t occ = r->rem_set()->occupied();
-    _occupied += occ;
-    return false;
-  }
-  size_t total_mem_sz() { return _total_mem_sz; }
-  size_t max_mem_sz() { return _max_mem_sz; }
-  size_t occupied() { return _occupied; }
-  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
-};
+  _prev_period_summary.subtract_from(&current);
+  print_summary_info(&_prev_period_summary);
 
-class PrintRSThreadVTimeClosure : public ThreadClosure {
-public:
-  virtual void do_thread(Thread *t) {
-    ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
-    gclog_or_tty->print("    %5.2f", crt->vtime_accum());
-  }
-};
+  _prev_period_summary.set(&current);
+}
 
 void G1RemSet::print_summary_info() {
-  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1RemSetSummary current;
+  current.initialize(this, n_workers());
+
+  print_summary_info(&current, " Cumulative RS summary");
+}
+
+void G1RemSet::print_summary_info(G1RemSetSummary * summary, const char * header) {
+  assert(summary != NULL, "just checking");
+
+  if (header != NULL) {
+    gclog_or_tty->print_cr("%s", header);
+  }
 
 #if CARD_REPEAT_HISTO
   gclog_or_tty->print_cr("\nG1 card_repeat count histogram: ");
@@ -745,46 +730,7 @@
   card_repeat_count.print_on(gclog_or_tty);
 #endif
 
-  gclog_or_tty->print_cr("\n Concurrent RS processed %d cards",
-                         _conc_refine_cards);
-  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  jint tot_processed_buffers =
-    dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread();
-  gclog_or_tty->print_cr("  Of %d completed buffers:", tot_processed_buffers);
-  gclog_or_tty->print_cr("     %8d (%5.1f%%) by conc RS threads.",
-                dcqs.processed_buffers_rs_thread(),
-                100.0*(float)dcqs.processed_buffers_rs_thread()/
-                (float)tot_processed_buffers);
-  gclog_or_tty->print_cr("     %8d (%5.1f%%) by mutator threads.",
-                dcqs.processed_buffers_mut(),
-                100.0*(float)dcqs.processed_buffers_mut()/
-                (float)tot_processed_buffers);
-  gclog_or_tty->print_cr("  Conc RS threads times(s)");
-  PrintRSThreadVTimeClosure p;
-  gclog_or_tty->print("     ");
-  g1->concurrent_g1_refine()->threads_do(&p);
-  gclog_or_tty->print_cr("");
-
-  HRRSStatsIter blk;
-  g1->heap_region_iterate(&blk);
-  gclog_or_tty->print_cr("  Total heap region rem set sizes = "SIZE_FORMAT"K."
-                         "  Max = "SIZE_FORMAT"K.",
-                         blk.total_mem_sz()/K, blk.max_mem_sz()/K);
-  gclog_or_tty->print_cr("  Static structures = "SIZE_FORMAT"K,"
-                         " free_lists = "SIZE_FORMAT"K.",
-                         HeapRegionRemSet::static_mem_size() / K,
-                         HeapRegionRemSet::fl_mem_size() / K);
-  gclog_or_tty->print_cr("    "SIZE_FORMAT" occupied cards represented.",
-                         blk.occupied());
-  HeapRegion* max_mem_sz_region = blk.max_mem_sz_region();
-  HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set();
-  gclog_or_tty->print_cr("    Max size region = "HR_FORMAT", "
-                         "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
-                         HR_FORMAT_PARAMS(max_mem_sz_region),
-                         (rem_set->mem_size() + K - 1)/K,
-                         (rem_set->occupied() + K - 1)/K);
-  gclog_or_tty->print_cr("    Did %d coarsenings.",
-                         HeapRegionRemSet::n_coarsenings());
+  summary->print_on(gclog_or_tty);
 }
 
 void G1RemSet::prepare_for_verify() {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Sat Jun 01 10:00:56 2013 +0200
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Tue May 28 09:32:06 2013 +0200
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSET_HPP
 
+#include "gc_implementation/g1/g1RemSetSummary.hpp"
+
 // A G1RemSet provides ways of iterating over pointers into a selected
 // collection set.
 
@@ -37,9 +39,11 @@
 // so that they can be used to update the individual region remsets.
 
 class G1RemSet: public CHeapObj<mtGC> {
+private:
+  G1RemSetSummary _prev_period_summary;
 protected:
   G1CollectedHeap* _g1;
-  unsigned _conc_refine_cards;
+  size_t _conc_refine_cards;
   uint n_workers();
 
 protected:
@@ -66,6 +70,8 @@
   // references into the collection set.
   OopsInHeapRegionClosure** _cset_rs_update_cl;
 
+  // Print the given summary info
+  virtual void print_summary_info(G1RemSetSummary * summary, const char * header = NULL);
 public:
   // This is called to reset dual hash tables after the gc pause
   // is finished and the initial hash table is no longer being
@@ -123,11 +129,18 @@
                            int worker_i,
                            bool check_for_refs_into_cset);
 
-  // Print any relevant summary info.
+  // Print accumulated summary info from the start of the VM.
   virtual void print_summary_info();
 
+  // Print accumulated summary info from the last time called.
+  virtual void print_periodic_summary_info();
+
   // Prepare remembered set for verification.
   virtual void prepare_for_verify();
+
+  size_t conc_refine_cards() const {
+    return _conc_refine_cards;
+  }
 };
 
 class CountNonCleanMemRegionClosure: public MemRegionClosure {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp	Tue May 28 09:32:06 2013 +0200
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/concurrentG1Refine.hpp"
+#include "gc_implementation/g1/concurrentG1RefineThread.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/g1RemSetSummary.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "runtime/thread.inline.hpp"
+
+class GetRSThreadVTimeClosure : public ThreadClosure {
+private:
+  G1RemSetSummary* _summary;
+  uint _counter;
+
+public:
+  GetRSThreadVTimeClosure(G1RemSetSummary * summary) : ThreadClosure(), _summary(summary), _counter(0) {
+    assert(_summary != NULL, "just checking");
+  }
+
+  virtual void do_thread(Thread* t) {
+    ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t;
+    _summary->set_rs_thread_vtime(_counter, crt->vtime_accum());
+    _counter++;
+  }
+};
+
+void G1RemSetSummary::update() {
+  _num_refined_cards = remset()->conc_refine_cards();
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  _num_processed_buf_mutator = dcqs.processed_buffers_mut();
+  _num_processed_buf_rs_threads = dcqs.processed_buffers_rs_thread();
+
+  _num_coarsenings = HeapRegionRemSet::n_coarsenings();
+
+  ConcurrentG1Refine * cg1r = G1CollectedHeap::heap()->concurrent_g1_refine();
+  if (_rs_threads_vtimes != NULL) {
+    GetRSThreadVTimeClosure p(this);
+    cg1r->worker_threads_do(&p);
+  }
+  set_sampling_thread_vtime(cg1r->sampling_thread()->vtime_accum());
+}
+
+void G1RemSetSummary::set_rs_thread_vtime(uint thread, double value) {
+  assert(_rs_threads_vtimes != NULL, "just checking");
+  assert(thread < _num_vtimes, "just checking");
+  _rs_threads_vtimes[thread] = value;
+}
+
+double G1RemSetSummary::rs_thread_vtime(uint thread) const {
+  assert(_rs_threads_vtimes != NULL, "just checking");
+  assert(thread < _num_vtimes, "just checking");
+  return _rs_threads_vtimes[thread];
+}
+
+void G1RemSetSummary::initialize(G1RemSet* remset, uint num_workers) {
+  assert(_rs_threads_vtimes == NULL, "just checking");
+  assert(remset != NULL, "just checking");
+
+  _remset = remset;
+  _num_vtimes = num_workers;
+  _rs_threads_vtimes = NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC);
+  memset(_rs_threads_vtimes, 0, sizeof(double) * _num_vtimes);
+
+  update();
+}
+
+void G1RemSetSummary::set(G1RemSetSummary* other) {
+  assert(other != NULL, "just checking");
+  assert(remset() == other->remset(), "just checking");
+  assert(_num_vtimes == other->_num_vtimes, "just checking");
+
+  _num_refined_cards = other->num_concurrent_refined_cards();
+
+  _num_processed_buf_mutator = other->num_processed_buf_mutator();
+  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads();
+
+  _num_coarsenings = other->_num_coarsenings;
+
+  memcpy(_rs_threads_vtimes, other->_rs_threads_vtimes, sizeof(double) * _num_vtimes);
+
+  set_sampling_thread_vtime(other->sampling_thread_vtime());
+}
+
+void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {
+  assert(other != NULL, "just checking");
+  assert(remset() == other->remset(), "just checking");
+  assert(_num_vtimes == other->_num_vtimes, "just checking");
+
+  _num_refined_cards = other->num_concurrent_refined_cards() - _num_refined_cards;
+
+  _num_processed_buf_mutator = other->num_processed_buf_mutator() - _num_processed_buf_mutator;
+  _num_processed_buf_rs_threads = other->num_processed_buf_rs_threads() - _num_processed_buf_rs_threads;
+
+  _num_coarsenings = other->num_coarsenings() - _num_coarsenings;
+
+  for (uint i = 0; i < _num_vtimes; i++) {
+    set_rs_thread_vtime(i, other->rs_thread_vtime(i) - rs_thread_vtime(i));
+  }
+
+  _sampling_thread_vtime = other->sampling_thread_vtime() - _sampling_thread_vtime;
+}
+
+class HRRSStatsIter: public HeapRegionClosure {
+  size_t _occupied;
+  size_t _total_mem_sz;
+  size_t _max_mem_sz;
+  HeapRegion* _max_mem_sz_region;
+public:
+  HRRSStatsIter() :
+    _occupied(0),
+    _total_mem_sz(0),
+    _max_mem_sz(0),
+    _max_mem_sz_region(NULL)
+  {}
+
+  bool doHeapRegion(HeapRegion* r) {
+    size_t mem_sz = r->rem_set()->mem_size();
+    if (mem_sz > _max_mem_sz) {
+      _max_mem_sz = mem_sz;
+      _max_mem_sz_region = r;
+    }
+    _total_mem_sz += mem_sz;
+    size_t occ = r->rem_set()->occupied();
+    _occupied += occ;
+    return false;
+  }
+  size_t total_mem_sz() { return _total_mem_sz; }
+  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t occupied() { return _occupied; }
+  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
+};
+
+double calc_percentage(size_t numerator, size_t denominator) {
+  if (denominator != 0) {
+    return (double)numerator / denominator * 100.0;
+  } else {
+    return 0.0f;
+  }
+}
+
+void G1RemSetSummary::print_on(outputStream* out) {
+  out->print_cr("\n Concurrent RS processed "SIZE_FORMAT" cards",
+                num_concurrent_refined_cards());
+  out->print_cr("  Of %d completed buffers:", num_processed_buf_total());
+  out->print_cr("     %8d (%5.1f%%) by concurrent RS threads.",
+                num_processed_buf_total(),
+                calc_percentage(num_processed_buf_rs_threads(), num_processed_buf_total()));
+  out->print_cr("     %8d (%5.1f%%) by mutator threads.",
+                num_processed_buf_mutator(),
+                calc_percentage(num_processed_buf_mutator(), num_processed_buf_total()));
+  out->print_cr("  Concurrent RS threads times (s)");
+  out->print("     ");
+  for (uint i = 0; i < _num_vtimes; i++) {
+    out->print("    %5.2f", rs_thread_vtime(i));
+  }
+  out->cr();
+  out->print_cr("  Concurrent sampling threads times (s)");
+  out->print_cr("         %5.2f", sampling_thread_vtime());
+
+  HRRSStatsIter blk;
+  G1CollectedHeap::heap()->heap_region_iterate(&blk);
+  out->print_cr("  Total heap region rem set sizes = "SIZE_FORMAT"K."
+                "  Max = "SIZE_FORMAT"K.",
+                blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+  out->print_cr("  Static structures = "SIZE_FORMAT"K,"
+                " free_lists = "SIZE_FORMAT"K.",
+                HeapRegionRemSet::static_mem_size() / K,
+                HeapRegionRemSet::fl_mem_size() / K);
+  out->print_cr("    "SIZE_FORMAT" occupied cards represented.",
+                blk.occupied());
+  HeapRegion* max_mem_sz_region = blk.max_mem_sz_region();
+  HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set();
+  out->print_cr("    Max size region = "HR_FORMAT", "
+                "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
+                HR_FORMAT_PARAMS(max_mem_sz_region),
+                (rem_set->mem_size() + K - 1)/K,
+                (rem_set->occupied() + K - 1)/K);
+
+  out->print_cr("    Did %d coarsenings.", num_coarsenings());
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.hpp	Tue May 28 09:32:06 2013 +0200
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP
+
+#include "utilities/ostream.hpp"
+
+class G1RemSet;
+
+// A G1RemSetSummary manages statistical information about the G1RemSet
+
+class G1RemSetSummary VALUE_OBJ_CLASS_SPEC {
+private:
+  friend class GetRSThreadVTimeClosure;
+
+  G1RemSet* _remset;
+
+  G1RemSet* remset() const {
+    return _remset;
+  }
+
+  size_t _num_refined_cards;
+  size_t _num_processed_buf_mutator;
+  size_t _num_processed_buf_rs_threads;
+
+  size_t _num_coarsenings;
+
+  double* _rs_threads_vtimes;
+  size_t _num_vtimes;
+
+  double _sampling_thread_vtime;
+
+  void set_rs_thread_vtime(uint thread, double value);
+  void set_sampling_thread_vtime(double value) {
+    _sampling_thread_vtime = value;
+  }
+
+  void free_and_null() {
+    if (_rs_threads_vtimes) {
+      FREE_C_HEAP_ARRAY(double, _rs_threads_vtimes, mtGC);
+      _rs_threads_vtimes = NULL;
+      _num_vtimes = 0;
+    }
+  }
+
+  // update this summary with current data from various places
+  void update();
+
+public:
+  G1RemSetSummary() : _remset(NULL), _num_refined_cards(0),
+    _num_processed_buf_mutator(0), _num_processed_buf_rs_threads(0), _num_coarsenings(0),
+    _rs_threads_vtimes(NULL), _num_vtimes(0), _sampling_thread_vtime(0.0f) {
+  }
+
+  ~G1RemSetSummary() {
+    free_and_null();
+  }
+
+  // set the counters in this summary to the values of the others
+  void set(G1RemSetSummary* other);
+  // subtract all counters from the other summary, and set them in the current
+  void subtract_from(G1RemSetSummary* other);
+
+  // initialize and get the first sampling
+  void initialize(G1RemSet* remset, uint num_workers);
+
+  void print_on(outputStream* out);
+
+  double rs_thread_vtime(uint thread) const;
+
+  double sampling_thread_vtime() const {
+    return _sampling_thread_vtime;
+  }
+
+  size_t num_concurrent_refined_cards() const {
+    return _num_refined_cards;
+  }
+
+  size_t num_processed_buf_mutator() const {
+    return _num_processed_buf_mutator;
+  }
+
+  size_t num_processed_buf_rs_threads() const {
+    return _num_processed_buf_rs_threads;
+  }
+
+  size_t num_processed_buf_total() const {
+    return num_processed_buf_mutator() + num_processed_buf_rs_threads();
+  }
+
+  size_t num_coarsenings() const {
+    return _num_coarsenings;
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1REMSETSUMMARY_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/gc/g1/TestSummarizeRSetStats.java	Tue May 28 09:32:06 2013 +0200
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestSummarizeRSetStats.java
+ * @bug 8013895
+ * @library /testlibrary
+ * @build TestSummarizeRSetStats
+ * @summary Verify output of -XX:+G1SummarizeRSetStats
+ * @run main TestSummarizeRSetStats
+ *
+ * Test the output of G1SummarizeRSetStats in conjunction with G1SummarizeRSetStatsPeriod.
+ */
+
+import com.oracle.java.testlibrary.*;
+import java.lang.Thread;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+class RunSystemGCs {
+    // 4M size, both are directly allocated into the old gen
+    static Object[] largeObject1 = new Object[1024 * 1024];
+    static Object[] largeObject2 = new Object[1024 * 1024];
+
+    static int[] temp;
+
+    public static void main(String[] args) {
+        // create some cross-references between these objects
+        for (int i = 0; i < largeObject1.length; i++) {
+            largeObject1[i] = largeObject2;
+        }
+
+        for (int i = 0; i < largeObject2.length; i++) {
+            largeObject2[i] = largeObject1;
+        }
+
+        int numGCs = Integer.parseInt(args[0]);
+
+        if (numGCs > 0) {
+            // try to force a minor collection: the young gen is 4M, the
+            // amount of data allocated below is roughly that (4*1024*1024 +
+            // some header data)
+            for (int i = 0; i < 1024 ; i++) {
+                temp = new int[1024];
+            }
+        }
+
+        for (int i = 0; i < numGCs - 1; i++) {
+            System.gc();
+        }
+    }
+}
+
+public class TestSummarizeRSetStats {
+
+    public static String runTest(String[] additionalArgs, int numGCs) throws Exception {
+        ArrayList<String> finalargs = new ArrayList<String>();
+        String[] defaultArgs = new String[] {
+            "-XX:+UseG1GC",
+            "-Xmn4m",
+            "-Xmx20m",
+            "-XX:InitiatingHeapOccupancyPercent=100", // we don't want the additional GCs due to initial marking
+            "-XX:+PrintGC",
+            "-XX:+UnlockDiagnosticVMOptions",
+            "-XX:G1HeapRegionSize=1M",
+        };
+
+        finalargs.addAll(Arrays.asList(defaultArgs));
+
+        if (additionalArgs != null) {
+            finalargs.addAll(Arrays.asList(additionalArgs));
+        }
+
+        finalargs.add(RunSystemGCs.class.getName());
+        finalargs.add(String.valueOf(numGCs));
+
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+            finalargs.toArray(new String[0]));
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+        output.shouldHaveExitValue(0);
+
+        String result = output.getStdout();
+        return result;
+    }
+
+    private static void expectStatistics(String result, int expectedCumulative, int expectedPeriodic) throws Exception {
+        int actualTotal = result.split("Concurrent RS processed").length - 1;
+        int actualCumulative = result.split("Cumulative RS summary").length - 1;
+
+        if (expectedCumulative != actualCumulative) {
+            throw new Exception("Incorrect amount of RSet summaries at the end. Expected " + expectedCumulative + ", got " + actualCumulative);
+        }
+
+        if (expectedPeriodic != (actualTotal - actualCumulative)) {
+            throw new Exception("Incorrect amount of per-period RSet summaries at the end. Expected " + expectedPeriodic + ", got " + (actualTotal - actualCumulative));
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        String result;
+
+        // no RSet statistics output
+        result = runTest(null, 0);
+        expectStatistics(result, 0, 0);
+
+        // no RSet statistics output
+        result = runTest(null, 2);
+        expectStatistics(result, 0, 0);
+
+        // no RSet statistics output
+        result = runTest(new String[] { "-XX:G1SummarizeRSetStatsPeriod=1" }, 3);
+        expectStatistics(result, 0, 0);
+
+        // single RSet statistics output at the end
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats" }, 0);
+        expectStatistics(result, 1, 0);
+
+        // single RSet statistics output at the end
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats" }, 2);
+        expectStatistics(result, 1, 0);
+
+        // single RSet statistics output
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 0);
+        expectStatistics(result, 1, 0);
+
+        // two times RSet statistics output
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 1);
+        expectStatistics(result, 1, 1);
+
+        // four times RSet statistics output
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=1" }, 3);
+        expectStatistics(result, 1, 3);
+
+        // three times RSet statistics output
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=2" }, 3);
+        expectStatistics(result, 1, 2);
+
+        // single RSet statistics output
+        result = runTest(new String[] { "-XX:+G1SummarizeRSetStats", "-XX:G1SummarizeRSetStatsPeriod=100" }, 3);
+        expectStatistics(result, 1, 1);
+    }
+}
+