6970376: ParNew: shared TaskQueue statistics
authorjcoomes
Mon, 09 Aug 2010 18:03:50 -0700
changeset 6252 dce68ee3d184
parent 6251 90e562b9f1cc
child 6253 228a0240f71e
6970376: ParNew: shared TaskQueue statistics Reviewed-by: ysr
hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Aug 09 05:41:05 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Aug 09 18:03:50 2010 -0700
@@ -51,9 +51,14 @@
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
   _promotion_failure_size(0),
-  _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
+  #if TASKQUEUE_STATS
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+  #endif // TASKQUEUE_STATS
+
   _survivor_chunk_array =
     (ChunkArray*) old_gen()->get_data_recorder(thread_num());
   _hash_seed = 17;  // Might want to take time-based random value.
@@ -100,7 +105,6 @@
     // Push remainder.
     bool ok = work_queue()->push(old);
     assert(ok, "just popped, push must be okay");
-    note_push();
   } else {
     // Restore length so that it can be used if there
     // is a promotion failure and forwarding pointers
@@ -126,7 +130,6 @@
     while (queue->size() > (juint)max_size) {
       oop obj_to_scan;
       if (queue->pop_local(obj_to_scan)) {
-        note_pop();
         if ((HeapWord *)obj_to_scan < young_old_boundary()) {
           if (obj_to_scan->is_objArray() &&
               obj_to_scan->is_forwarded() &&
@@ -271,20 +274,28 @@
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
+
+  ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); }
+
   inline ParScanThreadState& thread_state(int i);
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
+
   void reset(bool promotion_failed);
   void flush();
+
+  #if TASKQUEUE_STATS
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_termination_stats(outputStream* const st = gclog_or_tty);
+  static void
+    print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty);
+  void reset_stats();
+  #endif // TASKQUEUE_STATS
+
 private:
   ParallelTaskTerminator& _term;
   ParNewGeneration&       _gen;
   Generation&             _next_gen;
-  // staticstics
-  int _pushes;
-  int _pops;
-  int _steals;
 };
 
 
@@ -294,8 +305,7 @@
   GrowableArray<oop>** overflow_stack_set_,
   size_t desired_plab_sz, ParallelTaskTerminator& term)
   : ResourceArray(sizeof(ParScanThreadState), num_threads),
-    _gen(gen), _next_gen(old_gen), _term(term),
-    _pushes(0), _pops(0), _steals(0)
+    _gen(gen), _next_gen(old_gen), _term(term)
 {
   assert(num_threads > 0, "sanity check!");
   // Initialize states.
@@ -323,6 +333,82 @@
   }
 }
 
+#if TASKQUEUE_STATS
+void
+ParScanThreadState::reset_stats()
+{
+  taskqueue_stats().reset();
+  _term_attempts = 0;
+  _overflow_refills = 0;
+  _overflow_refill_objs = 0;
+}
+
+void ParScanThreadStateSet::reset_stats()
+{
+  for (int i = 0; i < length(); ++i) {
+    thread_state(i).reset_stats();
+  }
+}
+
+void
+ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- "
+                   "-------termination-------");
+  st->print_raw_cr("thr     ms        ms       %   "
+                   "    ms       %   attempts");
+  st->print_raw_cr("--- --------- --------- ------ "
+                   "--------- ------ --------");
+}
+
+void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
+{
+  print_termination_stats_hdr(st);
+
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const double elapsed_ms = pss.elapsed_time() * 1000.0;
+    const double s_roots_ms = pss.strong_roots_time() * 1000.0;
+    const double term_ms = pss.term_time() * 1000.0;
+    st->print_cr("%3d %9.2f %9.2f %6.2f "
+                 "%9.2f %6.2f " SIZE_FORMAT_W(8),
+                 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+                 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
+  }
+}
+
+// Print stats related to work queue activity.
+void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
+{
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  for (int i = 0; i < length(); ++i) {
+    const ParScanThreadState & pss = thread_state(i);
+    const TaskQueueStats & stats = pss.taskqueue_stats();
+    st->print("%3d ", i); stats.print(st); st->cr();
+    totals += stats;
+
+    if (pss.overflow_refills() > 0) {
+      st->print_cr("    " SIZE_FORMAT_W(10) " overflow refills    "
+                   SIZE_FORMAT_W(10) " overflow objects",
+                   pss.overflow_refills(), pss.overflow_refill_objs());
+    }
+  }
+  st->print("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+#endif // TASKQUEUE_STATS
+
 void ParScanThreadStateSet::flush()
 {
   // Work in this loop should be kept as lightweight as
@@ -346,42 +432,8 @@
     // Inform old gen that we're done.
     _next_gen.par_promote_alloc_done(i);
     _next_gen.par_oop_since_save_marks_iterate_done(i);
+  }
 
-    // Flush stats related to work queue activity (push/pop/steal)
-    // This could conceivably become a bottleneck; if so, we'll put the
-    // stat's gathering under the flag.
-    if (PAR_STATS_ENABLED) {
-      _pushes += par_scan_state.pushes();
-      _pops   += par_scan_state.pops();
-      _steals += par_scan_state.steals();
-      if (ParallelGCVerbose) {
-        gclog_or_tty->print("Thread %d complete:\n"
-                            "  Pushes: %7d    Pops: %7d    Steals %7d (in %d attempts)\n",
-                            i, par_scan_state.pushes(), par_scan_state.pops(),
-                            par_scan_state.steals(), par_scan_state.steal_attempts());
-        if (par_scan_state.overflow_pushes() > 0 ||
-            par_scan_state.overflow_refills() > 0) {
-          gclog_or_tty->print("  Overflow pushes: %7d    "
-                              "Overflow refills: %7d for %d objs.\n",
-                              par_scan_state.overflow_pushes(),
-                              par_scan_state.overflow_refills(),
-                              par_scan_state.overflow_refill_objs());
-        }
-
-        double elapsed = par_scan_state.elapsed();
-        double strong_roots = par_scan_state.strong_roots_time();
-        double term = par_scan_state.term_time();
-        gclog_or_tty->print(
-                            "  Elapsed: %7.2f ms.\n"
-                            "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                            "    Termination:  %7.2f ms (%6.2f%%) (in %d entries)\n",
-                           elapsed * 1000.0,
-                           strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                           term * 1000.0, (term*100.0/elapsed),
-                           par_scan_state.term_attempts());
-      }
-    }
-  }
   if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
     // We need to call this even when ResizeOldPLAB is disabled
     // so as to avoid breaking some asserts. While we may be able
@@ -456,15 +508,12 @@
     // We have no local work, attempt to steal from other threads.
 
     // attempt to steal work from promoted.
-    par_scan_state()->note_steal_attempt();
     if (task_queues()->steal(par_scan_state()->thread_num(),
                              par_scan_state()->hash_seed(),
                              obj_to_scan)) {
-      par_scan_state()->note_steal();
       bool res = work_q->push(obj_to_scan);
       assert(res, "Empty queue should have room for a push.");
 
-      par_scan_state()->note_push();
       //   if successful, goto Start.
       continue;
 
@@ -842,17 +891,6 @@
   }
   thread_state_set.reset(promotion_failed());
 
-  if (PAR_STATS_ENABLED && ParallelGCVerbose) {
-    gclog_or_tty->print("Thread totals:\n"
-               "  Pushes: %7d    Pops: %7d    Steals %7d (sum = %7d).\n",
-               thread_state_set.pushes(), thread_state_set.pops(),
-               thread_state_set.steals(),
-               thread_state_set.pops()+thread_state_set.steals());
-  }
-  assert(thread_state_set.pushes() == thread_state_set.pops()
-                                    + thread_state_set.steals(),
-         "Or else the queues are leaky.");
-
   // Process (weak) reference objects found during scavenge.
   ReferenceProcessor* rp = ref_processor();
   IsAliveClosure is_alive(this);
@@ -932,6 +970,9 @@
     gch->print_heap_change(gch_prev_used);
   }
 
+  TASKQUEUE_STATS_ONLY(thread_state_set.print_termination_stats());
+  TASKQUEUE_STATS_ONLY(thread_state_set.print_taskqueue_stats());
+
   if (UseAdaptiveSizePolicy) {
     size_policy->minor_collection_end(gch->gc_cause());
     size_policy->avg_survived()->sample(from()->used());
@@ -1104,9 +1145,8 @@
         gclog_or_tty->print("queue overflow!\n");
       }
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1227,9 +1267,8 @@
     if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       push_on_overflow_list(old, par_scan_state);
-      par_scan_state->note_overflow_push();
+      TASKQUEUE_STATS_ONLY(par_scan_state->taskqueue_stats().record_overflow(0));
     }
-    par_scan_state->note_push();
 
     return new_obj;
   }
@@ -1466,7 +1505,7 @@
     cur = next;
     n++;
   }
-  par_scan_state->note_overflow_refill(n);
+  TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
 #ifndef PRODUCT
   assert(_num_par_pushes >= n, "Too many pops?");
   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Aug 09 05:41:05 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Aug 09 18:03:50 2010 -0700
@@ -36,9 +36,6 @@
 typedef Padded<OopTaskQueue> ObjToScanQueue;
 typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
 
-// Enable this to get push/pop/steal stats.
-const int PAR_STATS_ENABLED = 0;
-
 class ParKeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
  private:
   ParScanWeakRefClosure* _par_cl;
@@ -94,8 +91,11 @@
 
   bool _to_space_full;
 
-  int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
-  int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
+#if TASKQUEUE_STATS
+  size_t _term_attempts;
+  size_t _overflow_refills;
+  size_t _overflow_refill_objs;
+#endif // TASKQUEUE_STATS
 
   // Stats for promotion failure
   size_t _promotion_failure_size;
@@ -181,45 +181,38 @@
   }
   void print_and_clear_promotion_failure_size();
 
-  int pushes() { return _pushes; }
-  int pops()   { return _pops; }
-  int steals() { return _steals; }
-  int steal_attempts() { return _steal_attempts; }
-  int term_attempts()  { return _term_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-  int overflow_refills() { return _overflow_refills; }
-  int overflow_refill_objs() { return _overflow_refill_objs; }
+#if TASKQUEUE_STATS
+  TaskQueueStats & taskqueue_stats() const { return _work_queue->stats; }
+
+  size_t term_attempts() const             { return _term_attempts; }
+  size_t overflow_refills() const          { return _overflow_refills; }
+  size_t overflow_refill_objs() const      { return _overflow_refill_objs; }
 
-  void note_push()  { if (PAR_STATS_ENABLED) _pushes++; }
-  void note_pop()   { if (PAR_STATS_ENABLED) _pops++; }
-  void note_steal() { if (PAR_STATS_ENABLED) _steals++; }
-  void note_steal_attempt() { if (PAR_STATS_ENABLED) _steal_attempts++; }
-  void note_term_attempt()  { if (PAR_STATS_ENABLED) _term_attempts++; }
-  void note_overflow_push() { if (PAR_STATS_ENABLED) _overflow_pushes++; }
-  void note_overflow_refill(int objs) {
-    if (PAR_STATS_ENABLED) {
-      _overflow_refills++;
-      _overflow_refill_objs += objs;
-    }
+  void note_term_attempt()                 { ++_term_attempts; }
+  void note_overflow_refill(size_t objs)   {
+    ++_overflow_refills; _overflow_refill_objs += objs;
   }
 
+  void reset_stats();
+#endif // TASKQUEUE_STATS
+
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
   void start_term_time() {
-    note_term_attempt();
+    TASKQUEUE_STATS_ONLY(note_term_attempt());
     _start_term = os::elapsedTime();
   }
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 };