8040803: G1: Concurrent mark hangs when mark stack overflows
Reviewed-by: brutisso, ehelin
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed May 14 13:32:44 2014 +0200
@@ -978,7 +978,9 @@
if (concurrent()) {
SuspendibleThreadSet::leave();
}
- _first_overflow_barrier_sync.enter();
+
+ bool barrier_aborted = !_first_overflow_barrier_sync.enter();
+
if (concurrent()) {
SuspendibleThreadSet::join();
}
@@ -986,7 +988,17 @@
// more work
if (verbose_low()) {
- gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
+ if (barrier_aborted) {
+ gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
+ } else {
+ gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
+ }
+ }
+
+ if (barrier_aborted) {
+ // If the barrier aborted we ignore the overflow condition and
+ // just abort the whole marking phase as quickly as possible.
+ return;
}
// If we're executing the concurrent phase of marking, reset the marking
@@ -1026,14 +1038,20 @@
if (concurrent()) {
SuspendibleThreadSet::leave();
}
- _second_overflow_barrier_sync.enter();
+
+ bool barrier_aborted = !_second_overflow_barrier_sync.enter();
+
if (concurrent()) {
SuspendibleThreadSet::join();
}
// at this point everything should be re-initialized and ready to go
if (verbose_low()) {
- gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
+ if (barrier_aborted) {
+ gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
+ } else {
+ gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
+ }
}
}
@@ -3240,6 +3258,8 @@
for (uint i = 0; i < _max_worker_id; ++i) {
_tasks[i]->clear_region_fields();
}
+ _first_overflow_barrier_sync.abort();
+ _second_overflow_barrier_sync.abort();
_has_aborted = true;
SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
--- a/hotspot/src/share/vm/utilities/workgroup.cpp Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.cpp Wed May 14 13:32:44 2014 +0200
@@ -376,21 +376,22 @@
WorkGangBarrierSync::WorkGangBarrierSync()
: _monitor(Mutex::safepoint, "work gang barrier sync", true),
- _n_workers(0), _n_completed(0), _should_reset(false) {
+ _n_workers(0), _n_completed(0), _should_reset(false), _aborted(false) {
}
WorkGangBarrierSync::WorkGangBarrierSync(uint n_workers, const char* name)
: _monitor(Mutex::safepoint, name, true),
- _n_workers(n_workers), _n_completed(0), _should_reset(false) {
+ _n_workers(n_workers), _n_completed(0), _should_reset(false), _aborted(false) {
}
void WorkGangBarrierSync::set_n_workers(uint n_workers) {
- _n_workers = n_workers;
- _n_completed = 0;
+ _n_workers = n_workers;
+ _n_completed = 0;
_should_reset = false;
+ _aborted = false;
}
-void WorkGangBarrierSync::enter() {
+bool WorkGangBarrierSync::enter() {
MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
if (should_reset()) {
// The should_reset() was set and we are the first worker to enter
@@ -413,10 +414,17 @@
set_should_reset(true);
monitor()->notify_all();
} else {
- while (n_completed() != n_workers()) {
+ while (n_completed() != n_workers() && !aborted()) {
monitor()->wait(/* no_safepoint_check */ true);
}
}
+ return !aborted();
+}
+
+void WorkGangBarrierSync::abort() {
+ MutexLockerEx x(monitor(), Mutex::_no_safepoint_check_flag);
+ set_aborted();
+ monitor()->notify_all();
}
// SubTasksDone functions.
--- a/hotspot/src/share/vm/utilities/workgroup.hpp Sun May 11 16:35:43 2014 -0700
+++ b/hotspot/src/share/vm/utilities/workgroup.hpp Wed May 14 13:32:44 2014 +0200
@@ -359,18 +359,20 @@
class WorkGangBarrierSync : public StackObj {
protected:
Monitor _monitor;
- uint _n_workers;
- uint _n_completed;
+ uint _n_workers;
+ uint _n_completed;
bool _should_reset;
+ bool _aborted;
Monitor* monitor() { return &_monitor; }
uint n_workers() { return _n_workers; }
uint n_completed() { return _n_completed; }
bool should_reset() { return _should_reset; }
+ bool aborted() { return _aborted; }
void zero_completed() { _n_completed = 0; }
void inc_completed() { _n_completed++; }
-
+ void set_aborted() { _aborted = true; }
void set_should_reset(bool v) { _should_reset = v; }
public:
@@ -383,8 +385,14 @@
// Enter the barrier. A worker that enters the barrier will
// not be allowed to leave until all other threads have
- // also entered the barrier.
- void enter();
+ // also entered the barrier or the barrier is aborted.
+ // Returns false if the barrier was aborted.
+ bool enter();
+
+ // Aborts the barrier and wakes up any threads waiting for
+ // the barrier to complete. The barrier will remain in the
+ // aborted state until the next call to set_n_workers().
+ void abort();
};
// A class to manage claiming of subtasks within a group of tasks. The