6966222: G1: simplify TaskQueue overflow handling
authorjcoomes
Mon, 09 Aug 2010 05:41:05 -0700
changeset 6251 90e562b9f1cc
parent 6250 5680f968c721
child 6252 dce68ee3d184
6966222: G1: simplify TaskQueue overflow handling Reviewed-by: tonyp, ysr
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
hotspot/src/share/vm/utilities/taskqueue.cpp
hotspot/src/share/vm/utilities/taskqueue.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Aug 06 10:17:21 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 09 05:41:05 2010 -0700
@@ -2710,6 +2710,35 @@
   }
 };
 
+#if TASKQUEUE_STATS
+void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) {
+  st->print_raw_cr("GC Task Stats");
+  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
+  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
+}
+
+void G1CollectedHeap::print_taskqueue_stats(outputStream* const st) const {
+  print_taskqueue_stats_hdr(st);
+
+  TaskQueueStats totals;
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr();
+    totals += task_queue(i)->stats;
+  }
+  st->print_raw("tot "); totals.print(st); st->cr();
+
+  DEBUG_ONLY(totals.verify());
+}
+
+void G1CollectedHeap::reset_taskqueue_stats() {
+  const int n = MAX2(workers()->total_workers(), 1);
+  for (int i = 0; i < n; ++i) {
+    task_queue(i)->stats.reset();
+  }
+}
+#endif // TASKQUEUE_STATS
+
 void
 G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
   if (GC_locker::check_active_before_gc()) {
@@ -2970,6 +2999,9 @@
     }
   }
 
+  TASKQUEUE_STATS_ONLY(if (ParallelGCVerbose) print_taskqueue_stats());
+  TASKQUEUE_STATS_ONLY(reset_taskqueue_stats());
+
   if (PrintHeapAtGC) {
     Universe::print_heap_after_gc();
   }
@@ -3715,10 +3747,6 @@
     _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
-#if G1_DETAILED_STATS
-    _pushes(0), _pops(0), _steals(0),
-    _steal_attempts(0),  _overflow_pushes(0),
-#endif
     _strong_roots_time(0), _term_time(0),
     _alloc_buffer_waste(0), _undo_waste(0)
 {
@@ -3738,14 +3766,41 @@
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
   memset(_surviving_young_words, 0, real_length * sizeof(size_t));
 
-  _overflowed_refs = new OverflowQueue(10);
-
   _alloc_buffers[GCAllocForSurvived] = &_surviving_alloc_buffer;
   _alloc_buffers[GCAllocForTenured]  = &_tenured_alloc_buffer;
 
   _start = os::elapsedTime();
 }
 
+void
+G1ParScanThreadState::print_termination_stats_hdr(outputStream* const st)
+{
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------"
+                   " ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts"
+                   "  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------"
+                   " ------- ------- -------");
+}
+
+void
+G1ParScanThreadState::print_termination_stats(int i,
+                                              outputStream* const st) const
+{
+  const double elapsed_ms = elapsed_time() * 1000.0;
+  const double s_roots_ms = strong_roots_time() * 1000.0;
+  const double term_ms    = term_time() * 1000.0;
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts(),
+               (alloc_buffer_waste() + undo_waste()) * HeapWordSize / K,
+               alloc_buffer_waste() * HeapWordSize / K,
+               undo_waste() * HeapWordSize / K);
+}
+
 G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state) { }
@@ -3952,12 +4007,9 @@
     G1ParScanThreadState* pss = par_scan_state();
     while (true) {
       pss->trim_queue();
-      IF_G1_DETAILED_STATS(pss->note_steal_attempt());
 
       StarTask stolen_task;
       if (queues()->steal(pss->queue_num(), pss->hash_seed(), stolen_task)) {
-        IF_G1_DETAILED_STATS(pss->note_steal());
-
         // slightly paranoid tests; I'm trying to catch potential
         // problems before we go into push_on_queue to know where the
         // problem is coming from
@@ -4076,35 +4128,9 @@
     // Clean up any par-expanded rem sets.
     HeapRegionRemSet::par_cleanup();
 
-    MutexLocker x(stats_lock());
     if (ParallelGCVerbose) {
-      gclog_or_tty->print("Thread %d complete:\n", i);
-#if G1_DETAILED_STATS
-      gclog_or_tty->print("  Pushes: %7d    Pops: %7d   Overflows: %7d   Steals %7d (in %d attempts)\n",
-                          pss.pushes(),
-                          pss.pops(),
-                          pss.overflow_pushes(),
-                          pss.steals(),
-                          pss.steal_attempts());
-#endif
-      double elapsed      = pss.elapsed();
-      double strong_roots = pss.strong_roots_time();
-      double term         = pss.term_time();
-      gclog_or_tty->print("  Elapsed: %7.2f ms.\n"
-                          "    Strong roots: %7.2f ms (%6.2f%%)\n"
-                          "    Termination:  %7.2f ms (%6.2f%%) "
-                                                 "(in "SIZE_FORMAT" entries)\n",
-                          elapsed * 1000.0,
-                          strong_roots * 1000.0, (strong_roots*100.0/elapsed),
-                          term * 1000.0, (term*100.0/elapsed),
-                          pss.term_attempts());
-      size_t total_waste = pss.alloc_buffer_waste() + pss.undo_waste();
-      gclog_or_tty->print("  Waste: %8dK\n"
-                 "    Alloc Buffer: %8dK\n"
-                 "    Undo: %8dK\n",
-                 (total_waste * HeapWordSize) / K,
-                 (pss.alloc_buffer_waste() * HeapWordSize) / K,
-                 (pss.undo_waste() * HeapWordSize) / K);
+      MutexLocker x(stats_lock());
+      pss.print_termination_stats(i);
     }
 
     assert(pss.refs_to_scan() == 0, "Task queue should be empty");
@@ -4221,6 +4247,7 @@
   if (ParallelGCThreads > 0) {
     // The individual threads will set their evac-failure closures.
     StrongRootsScope srs(this);
+    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
     workers()->run_task(&g1_par_task);
   } else {
     StrongRootsScope srs(this);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Aug 06 10:17:21 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Aug 09 05:41:05 2010 -0700
@@ -46,17 +46,7 @@
 class ConcurrentG1Refine;
 class ConcurrentZFThread;
 
-// If want to accumulate detailed statistics on work queues
-// turn this on.
-#define G1_DETAILED_STATS 0
-
-#if G1_DETAILED_STATS
-#  define IF_G1_DETAILED_STATS(code) code
-#else
-#  define IF_G1_DETAILED_STATS(code)
-#endif
-
-typedef GenericTaskQueue<StarTask>          RefToScanQueue;
+typedef OverflowTaskQueue<StarTask>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
 
 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
@@ -471,6 +461,12 @@
   virtual void shrink(size_t expand_bytes);
   void shrink_helper(size_t expand_bytes);
 
+  #if TASKQUEUE_STATS
+  static void print_taskqueue_stats_hdr(outputStream* const st = gclog_or_tty);
+  void print_taskqueue_stats(outputStream* const st = gclog_or_tty) const;
+  void reset_taskqueue_stats();
+  #endif // TASKQUEUE_STATS
+
   // Do an incremental collection: identify a collection set, and evacuate
   // its live objects elsewhere.
   virtual void do_collection_pause();
@@ -662,7 +658,7 @@
 public:
   void set_refine_cte_cl_concurrency(bool concurrent);
 
-  RefToScanQueue *task_queue(int i);
+  RefToScanQueue *task_queue(int i) const;
 
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
@@ -1579,9 +1575,6 @@
   CardTableModRefBS* _ct_bs;
   G1RemSet* _g1_rem;
 
-  typedef GrowableArray<StarTask> OverflowQueue;
-  OverflowQueue* _overflowed_refs;
-
   G1ParGCAllocBuffer  _surviving_alloc_buffer;
   G1ParGCAllocBuffer  _tenured_alloc_buffer;
   G1ParGCAllocBuffer* _alloc_buffers[GCAllocPurposeCount];
@@ -1598,10 +1591,6 @@
   int _queue_num;
 
   size_t _term_attempts;
-#if G1_DETAILED_STATS
-  int _pushes, _pops, _steals, _steal_attempts;
-  int _overflow_pushes;
-#endif
 
   double _start;
   double _start_strong_roots;
@@ -1615,7 +1604,7 @@
   // this points into the array, as we use the first few entries for padding
   size_t* _surviving_young_words;
 
-#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+#define PADDING_ELEM_NUM (DEFAULT_CACHE_LINE_SIZE / sizeof(size_t))
 
   void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
 
@@ -1650,15 +1639,14 @@
   }
 
   RefToScanQueue*   refs()            { return _refs;             }
-  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
   ageTable*         age_table()       { return &_age_table;       }
 
   G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
     return _alloc_buffers[purpose];
   }
 
-  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
-  size_t undo_waste()                            { return _undo_waste; }
+  size_t alloc_buffer_waste() const              { return _alloc_buffer_waste; }
+  size_t undo_waste() const                      { return _undo_waste; }
 
   template <class T> void push_on_queue(T* ref) {
     assert(ref != NULL, "invariant");
@@ -1671,12 +1659,7 @@
       assert(_g1h->obj_in_cs(p), "Should be in CS");
     }
 #endif
-    if (!refs()->push(ref)) {
-      overflowed_refs()->push(ref);
-      IF_G1_DETAILED_STATS(note_overflow_push());
-    } else {
-      IF_G1_DETAILED_STATS(note_push());
-    }
+    refs()->push(ref);
   }
 
   void pop_from_queue(StarTask& ref) {
@@ -1687,7 +1670,6 @@
              _g1h->is_in_g1_reserved(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
                                                      : oopDesc::load_decode_heap_oop((oop*)ref)),
               "invariant");
-      IF_G1_DETAILED_STATS(note_pop());
     } else {
       StarTask null_task;
       ref = null_task;
@@ -1695,7 +1677,8 @@
   }
 
   void pop_from_overflow_queue(StarTask& ref) {
-    StarTask new_ref = overflowed_refs()->pop();
+    StarTask new_ref;
+    refs()->pop_overflow(new_ref);
     assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
     assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
     assert(has_partial_array_mask((oop*)new_ref) ||
@@ -1705,8 +1688,8 @@
     ref = new_ref;
   }
 
-  int refs_to_scan()                             { return refs()->size();                 }
-  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+  int refs_to_scan()            { return refs()->size(); }
+  int overflowed_refs_to_scan() { return refs()->overflow_stack()->length(); }
 
   template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
     if (G1DeferredRSUpdate) {
@@ -1775,30 +1758,16 @@
   int* hash_seed() { return &_hash_seed; }
   int  queue_num() { return _queue_num; }
 
-  size_t term_attempts()   { return _term_attempts; }
+  size_t term_attempts() const  { return _term_attempts; }
   void note_term_attempt() { _term_attempts++; }
 
-#if G1_DETAILED_STATS
-  int pushes()          { return _pushes; }
-  int pops()            { return _pops; }
-  int steals()          { return _steals; }
-  int steal_attempts()  { return _steal_attempts; }
-  int overflow_pushes() { return _overflow_pushes; }
-
-  void note_push()          { _pushes++; }
-  void note_pop()           { _pops++; }
-  void note_steal()         { _steals++; }
-  void note_steal_attempt() { _steal_attempts++; }
-  void note_overflow_push() { _overflow_pushes++; }
-#endif
-
   void start_strong_roots() {
     _start_strong_roots = os::elapsedTime();
   }
   void end_strong_roots() {
     _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
   }
-  double strong_roots_time() { return _strong_roots_time; }
+  double strong_roots_time() const { return _strong_roots_time; }
 
   void start_term_time() {
     note_term_attempt();
@@ -1807,12 +1776,17 @@
   void end_term_time() {
     _term_time += (os::elapsedTime() - _start_term);
   }
-  double term_time() { return _term_time; }
+  double term_time() const { return _term_time; }
 
-  double elapsed() {
+  double elapsed_time() const {
     return os::elapsedTime() - _start;
   }
 
+  static void
+    print_termination_stats_hdr(outputStream* const st = gclog_or_tty);
+  void
+    print_termination_stats(int i, outputStream* const st = gclog_or_tty) const;
+
   size_t* surviving_young_words() {
     // We add on to hide entry 0 which accumulates surviving words for
     // age -1 regions (i.e. non-young ones)
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Fri Aug 06 10:17:21 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Mon Aug 09 05:41:05 2010 -0700
@@ -81,11 +81,10 @@
   return attempt_allocation_slow(word_size, permit_collection_pause);
 }
 
-inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) const {
   return _task_queues->queue(i);
 }
 
-
 inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
   return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
 }
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp	Fri Aug 06 10:17:21 2010 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp	Mon Aug 09 05:41:05 2010 -0700
@@ -36,6 +36,14 @@
   "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
 };
 
+TaskQueueStats & TaskQueueStats::operator +=(const TaskQueueStats & addend)
+{
+  for (unsigned int i = 0; i < last_stat_id; ++i) {
+    _stats[i] += addend._stats[i];
+  }
+  return *this;
+}
+
 void TaskQueueStats::print_header(unsigned int line, outputStream* const stream,
                                   unsigned int width)
 {
@@ -71,6 +79,29 @@
   }
   #undef FMT
 }
+
+#ifdef ASSERT
+// Invariants which should hold after a TaskQueue has been emptied and is
+// quiescent; they do not hold at arbitrary times.
+void TaskQueueStats::verify() const
+{
+  assert(get(push) == get(pop) + get(steal),
+         err_msg("push=" SIZE_FORMAT " pop=" SIZE_FORMAT " steal=" SIZE_FORMAT,
+                 get(push), get(pop), get(steal)));
+  assert(get(pop_slow) <= get(pop),
+         err_msg("pop_slow=" SIZE_FORMAT " pop=" SIZE_FORMAT,
+                 get(pop_slow), get(pop)));
+  assert(get(steal) <= get(steal_attempt),
+         err_msg("steal=" SIZE_FORMAT " steal_attempt=" SIZE_FORMAT,
+                 get(steal), get(steal_attempt)));
+  assert(get(overflow) == 0 || get(push) != 0,
+         err_msg("overflow=" SIZE_FORMAT " push=" SIZE_FORMAT,
+                 get(overflow), get(push)));
+  assert(get(overflow_max_len) == 0 || get(overflow) != 0,
+         err_msg("overflow_max_len=" SIZE_FORMAT " overflow=" SIZE_FORMAT,
+                 get(overflow_max_len), get(overflow)));
+}
+#endif // ASSERT
 #endif // TASKQUEUE_STATS
 
 int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp	Fri Aug 06 10:17:21 2010 -0700
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp	Mon Aug 09 05:41:05 2010 -0700
@@ -59,15 +59,21 @@
   inline void record_steal(bool success);
   inline void record_overflow(size_t new_length);
 
+  TaskQueueStats & operator +=(const TaskQueueStats & addend);
+
   inline size_t get(StatId id) const { return _stats[id]; }
   inline const size_t* get() const   { return _stats; }
 
   inline void reset();
 
+  // Print the specified line of the header (does not include a line separator).
   static void print_header(unsigned int line, outputStream* const stream = tty,
                            unsigned int width = 10);
+  // Print the statistics (does not include a line separator).
   void print(outputStream* const stream = tty, unsigned int width = 10) const;
 
+  DEBUG_ONLY(void verify() const;)
+
 private:
   size_t                    _stats[last_stat_id];
   static const char * const _names[last_stat_id];