8040803: G1: Concurrent mark hangs when mark stack overflows
authorpliden
Wed, 14 May 2014 13:32:44 +0200
changeset 24468 cb71997b6484
parent 24467 886598231964
child 24469 33e039f8ee55
8040803: G1: Concurrent mark hangs when mark stack overflows Reviewed-by: brutisso, ehelin
hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
hotspot/src/share/vm/utilities/workgroup.cpp
hotspot/src/share/vm/utilities/workgroup.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed May 14 13:32:44 2014 +0200
@@ -978,7 +978,9 @@
   if (concurrent()) {
     SuspendibleThreadSet::leave();
   }
-  _first_overflow_barrier_sync.enter();
+
+  bool barrier_aborted = !_first_overflow_barrier_sync.enter();
+
   if (concurrent()) {
     SuspendibleThreadSet::join();
   }
@@ -986,7 +988,17 @@
   // more work
 
   if (verbose_low()) {
-    gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
+    if (barrier_aborted) {
+      gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
+    } else {
+      gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
+    }
+  }
+
+  if (barrier_aborted) {
+    // If the barrier aborted we ignore the overflow condition and
+    // just abort the whole marking phase as quickly as possible.
+    return;
   }
 
   // If we're executing the concurrent phase of marking, reset the marking
@@ -1026,14 +1038,20 @@
   if (concurrent()) {
     SuspendibleThreadSet::leave();
   }
-  _second_overflow_barrier_sync.enter();
+
+  bool barrier_aborted = !_second_overflow_barrier_sync.enter();
+
   if (concurrent()) {
     SuspendibleThreadSet::join();
   }
   // at this point everything should be re-initialized and ready to go
 
   if (verbose_low()) {
-    gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
+    if (barrier_aborted) {
+      gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
+    } else {
+      gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
+    }
   }
 }
 
@@ -3240,6 +3258,8 @@
   for (uint i = 0; i < _max_worker_id; ++i) {
     _tasks[i]->clear_region_fields();
   }
+  _first_overflow_barrier_sync.abort();
+  _second_overflow_barrier_sync.abort();
   _has_aborted = true;
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
--- a/hotspot/src/share/vm/utilities/workgroup.cpp	Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp	Wed May 14 13:32:44 2014 +0200
@@ -376,21 +376,22 @@
 
 WorkGangBarrierSync::WorkGangBarrierSync()
   : _monitor(Mutex::safepoint, "work gang barrier sync", true),
-    _n_workers(0), _n_completed(0), _should_reset(false) {
+    _n_workers(0), _n_completed(0), _should_reset(false), _aborted(false) {
 }
 
 WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name)
   : _monitor(Mutex::safepoint, name, true),
-    _n_workers(n_workers), _n_completed(0), _should_reset(false) {
+    _n_workers(n_workers), _n_completed(0), _should_reset(false), _aborted(false) {
 }
 
 void WorkGangBarrierSync::set_n_workers(uint n_workers) {
-  _n_workers   = n_workers;
-  _n_completed = 0;
+  _n_workers    = n_workers;
+  _n_completed  = 0;
   _should_reset = false;
+  _aborted      = false;
 }
 
-void WorkGangBarrierSync::enter() {
+bool WorkGangBarrierSync::enter() {
   MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
   if (should_reset()) {
     // The should_reset() was set and we are the first worker to enter
@@ -413,10 +414,17 @@
     set_should_reset(true);
     monitor()->notify_all();
   } else {
-    while (n_completed() != n_workers()) {
+    while (n_completed() != n_workers() && !aborted()) {
       monitor()->wait(/* no_safepoint_check */ true);
     }
   }
+  return !aborted();
+}
+
+void WorkGangBarrierSync::abort() {
+  MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
+  set_aborted();
+  monitor()->notify_all();
 }
 
 // SubTasksDone functions.
--- a/hotspot/src/share/vm/utilities/workgroup.hpp	Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp	Wed May 14 13:32:44 2014 +0200
@@ -359,18 +359,20 @@
 class WorkGangBarrierSync : public StackObj {
 protected:
   Monitor _monitor;
-  uint     _n_workers;
-  uint     _n_completed;
+  uint    _n_workers;
+  uint    _n_completed;
   bool    _should_reset;
+  bool    _aborted;
 
   Monitor* monitor()        { return &_monitor; }
   uint     n_workers()      { return _n_workers; }
   uint     n_completed()    { return _n_completed; }
   bool     should_reset()   { return _should_reset; }
+  bool     aborted()        { return _aborted; }
 
   void     zero_completed() { _n_completed = 0; }
   void     inc_completed()  { _n_completed++; }
-
+  void     set_aborted()    { _aborted = true; }
   void     set_should_reset(bool v) { _should_reset = v; }
 
 public:
@@ -383,8 +385,14 @@
 
   // Enter the barrier. A worker that enters the barrier will
   // not be allowed to leave until all other threads have
-  // also entered the barrier.
-  void enter();
+  // also entered the barrier or the barrier is aborted.
+  // Returns false if the barrier was aborted.
+  bool enter();
+
+  // Aborts the barrier and wakes up any threads waiting for
+  // the barrier to complete. The barrier will remain in the
+  // aborted state until the next call to set_n_workers().
+  void abort();
 };
 
 // A class to manage claiming of subtasks within a group of tasks.  The