8190426: Lazily initialize refinement threads with UseDynamicNumberOfGCThreads
Reviewed-by: sangheki, sjohanss
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp Thu Nov 23 15:51:06 2017 +0100
@@ -33,6 +33,107 @@
#include "utilities/pair.hpp"
#include <math.h>
+G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
+ G1ConcurrentRefineThread* result = NULL;
+ if (initializing || !InjectGCWorkerCreationFailure) {
+ result = new G1ConcurrentRefineThread(_cr, worker_id);
+ }
+ if (result == NULL || result->osthread() == NULL) {
+ log_warning(gc)("Failed to create refinement thread %u, no more %s",
+ worker_id,
+ result == NULL ? "memory" : "OS threads");
+ }
+ return result;
+}
+
+G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() :
+ _cr(NULL),
+ _threads(NULL),
+ _num_max_threads(0)
+{
+}
+
+G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
+ for (uint i = 0; i < _num_max_threads; i++) {
+ G1ConcurrentRefineThread* t = _threads[i];
+ if (t != NULL) {
+ delete t;
+ }
+ }
+ FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
+}
+
+jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint num_max_threads) {
+ assert(cr != NULL, "G1ConcurrentRefine must not be NULL");
+ _cr = cr;
+ _num_max_threads = num_max_threads;
+
+ _threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, num_max_threads, mtGC);
+ if (_threads == NULL) {
+ vm_shutdown_during_initialization("Could not allocate thread holder array.");
+ return JNI_ENOMEM;
+ }
+
+ for (uint i = 0; i < num_max_threads; i++) {
+ if (UseDynamicNumberOfGCThreads && i != 0 /* Always start first thread. */) {
+ _threads[i] = NULL;
+ } else {
+ _threads[i] = create_refinement_thread(i, true);
+ if (_threads[i] == NULL) {
+ vm_shutdown_during_initialization("Could not allocate refinement threads.");
+ return JNI_ENOMEM;
+ }
+ }
+ }
+ return JNI_OK;
+}
+
+void G1ConcurrentRefineThreadControl::maybe_activate_next(uint cur_worker_id) {
+ assert(cur_worker_id < _num_max_threads,
+ "Activating another thread from %u not allowed since there can be at most %u",
+ cur_worker_id, _num_max_threads);
+ if (cur_worker_id == (_num_max_threads - 1)) {
+ // Already the last thread, there is no more thread to activate.
+ return;
+ }
+
+ uint worker_id = cur_worker_id + 1;
+ G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id];
+ if (thread_to_activate == NULL) {
+ // Still need to create the thread...
+ _threads[worker_id] = create_refinement_thread(worker_id, false);
+ thread_to_activate = _threads[worker_id];
+ }
+ if (thread_to_activate != NULL && !thread_to_activate->is_active()) {
+ thread_to_activate->activate();
+ }
+}
+
+void G1ConcurrentRefineThreadControl::print_on(outputStream* st) const {
+ for (uint i = 0; i < _num_max_threads; ++i) {
+ if (_threads[i] != NULL) {
+ _threads[i]->print_on(st);
+ st->cr();
+ }
+ }
+}
+
+void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
+ for (uint i = 0; i < _num_max_threads; i++) {
+ if (_threads[i] != NULL) {
+ tc->do_thread(_threads[i]);
+ }
+ }
+}
+
+void G1ConcurrentRefineThreadControl::stop() {
+ for (uint i = 0; i < _num_max_threads; i++) {
+ if (_threads[i] != NULL) {
+ _threads[i]->stop();
+ }
+ }
+}
+
// Arbitrary but large limits, to simplify some of the zone calculations.
// The general idea is to allow expressions like
// MIN2(x OP y, max_XXX_zone)
@@ -96,7 +197,7 @@
size_t yellow_zone,
uint worker_i) {
double yellow_size = yellow_zone - green_zone;
- double step = yellow_size / G1ConcurrentRefine::thread_num();
+ double step = yellow_size / G1ConcurrentRefine::max_num_threads();
if (worker_i == 0) {
// Potentially activate worker 0 more aggressively, to keep
// available buffers near green_zone value. When yellow_size is
@@ -115,8 +216,7 @@
size_t yellow_zone,
size_t red_zone,
size_t min_yellow_zone_size) :
- _threads(NULL),
- _n_worker_threads(thread_num()),
+ _thread_control(),
_green_zone(green_zone),
_yellow_zone(yellow_zone),
_red_zone(red_zone),
@@ -125,9 +225,13 @@
assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone);
}
+jint G1ConcurrentRefine::initialize() {
+ return _thread_control.initialize(this, max_num_threads());
+}
+
static size_t calc_min_yellow_zone_size() {
size_t step = G1ConcRefinementThresholdStep;
- uint n_workers = G1ConcurrentRefine::thread_num();
+ uint n_workers = G1ConcurrentRefine::max_num_threads();
if ((max_yellow_zone / step) < n_workers) {
return max_yellow_zone;
} else {
@@ -191,77 +295,27 @@
return NULL;
}
- cr->_threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, cr->_n_worker_threads, mtGC);
- if (cr->_threads == NULL) {
- *ecode = JNI_ENOMEM;
- vm_shutdown_during_initialization("Could not allocate an array for G1ConcurrentRefineThread");
- return NULL;
- }
-
- uint worker_id_offset = DirtyCardQueueSet::num_par_ids();
-
- G1ConcurrentRefineThread *next = NULL;
- for (uint i = cr->_n_worker_threads - 1; i != UINT_MAX; i--) {
- Thresholds thresholds = calc_thresholds(green_zone, yellow_zone, i);
- G1ConcurrentRefineThread* t =
- new G1ConcurrentRefineThread(cr,
- next,
- worker_id_offset,
- i,
- activation_level(thresholds),
- deactivation_level(thresholds));
- assert(t != NULL, "Conc refine should have been created");
- if (t->osthread() == NULL) {
- *ecode = JNI_ENOMEM;
- vm_shutdown_during_initialization("Could not create G1ConcurrentRefineThread");
- return NULL;
- }
-
- assert(t->cr() == cr, "Conc refine thread should refer to this");
- cr->_threads[i] = t;
- next = t;
- }
-
- *ecode = JNI_OK;
+ *ecode = cr->initialize();
return cr;
}
void G1ConcurrentRefine::stop() {
- for (uint i = 0; i < _n_worker_threads; i++) {
- _threads[i]->stop();
- }
-}
-
-void G1ConcurrentRefine::update_thread_thresholds() {
- for (uint i = 0; i < _n_worker_threads; i++) {
- Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, i);
- _threads[i]->update_thresholds(activation_level(thresholds),
- deactivation_level(thresholds));
- }
+ _thread_control.stop();
}
G1ConcurrentRefine::~G1ConcurrentRefine() {
- for (uint i = 0; i < _n_worker_threads; i++) {
- delete _threads[i];
- }
- FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
}
void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
- for (uint i = 0; i < _n_worker_threads; i++) {
- tc->do_thread(_threads[i]);
- }
+ _thread_control.worker_threads_do(tc);
}
-uint G1ConcurrentRefine::thread_num() {
+uint G1ConcurrentRefine::max_num_threads() {
return G1ConcRefinementThreads;
}
void G1ConcurrentRefine::print_threads_on(outputStream* st) const {
- for (uint i = 0; i < _n_worker_threads; ++i) {
- _threads[i]->print_on(st);
- st->cr();
- }
+ _thread_control.print_on(st);
}
static size_t calc_new_green_zone(size_t green,
@@ -326,16 +380,15 @@
if (G1UseAdaptiveConcRefinement) {
update_zones(update_rs_time, update_rs_processed_buffers, goal_ms);
- update_thread_thresholds();
// Change the barrier params
- if (_n_worker_threads == 0) {
+ if (max_num_threads() == 0) {
// Disable dcqs notification when there are no threads to notify.
dcqs.set_process_completed_threshold(INT_MAX);
} else {
// Worker 0 is the primary; wakeup is via dcqs notification.
STATIC_ASSERT(max_yellow_zone <= INT_MAX);
- size_t activate = _threads[0]->activation_threshold();
+ size_t activate = activation_threshold(0);
dcqs.set_process_completed_threshold((int)activate);
}
dcqs.set_max_completed_queue((int)red_zone());
@@ -349,3 +402,42 @@
}
dcqs.notify_if_necessary();
}
+
+size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
+ Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
+ return activation_level(thresholds);
+}
+
+size_t G1ConcurrentRefine::deactivation_threshold(uint worker_id) const {
+ Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
+ return deactivation_level(thresholds);
+}
+
+uint G1ConcurrentRefine::worker_id_offset() {
+ return DirtyCardQueueSet::num_par_ids();
+}
+
+void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers) {
+ if (num_cur_buffers > activation_threshold(worker_id + 1)) {
+ _thread_control.maybe_activate_next(worker_id);
+ }
+}
+
+bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
+ DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+
+ size_t curr_buffer_num = dcqs.completed_buffers_num();
+ // If the number of the buffers falls down into the yellow zone,
+ // that means that the transition period after the evacuation pause has ended.
+ // Since the value written to the DCQS is the same for all threads, there is no
+ // need to synchronize.
+ if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= yellow_zone()) {
+ dcqs.set_completed_queue_padding(0);
+ }
+
+ maybe_activate_more_threads(worker_id, curr_buffer_num);
+
+ // Process the next buffer, if there are enough left.
+ return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
+ deactivation_threshold(worker_id));
+}
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp Thu Nov 23 15:51:06 2017 +0100
@@ -30,30 +30,63 @@
// Forward decl
class CardTableEntryClosure;
+class G1ConcurrentRefine;
class G1ConcurrentRefineThread;
class outputStream;
class ThreadClosure;
-class G1ConcurrentRefine : public CHeapObj<mtGC> {
+// Helper class for refinement thread management. Used to start, stop and
+// iterate over them.
+class G1ConcurrentRefineThreadControl VALUE_OBJ_CLASS_SPEC {
+ G1ConcurrentRefine* _cr;
+
G1ConcurrentRefineThread** _threads;
- uint _n_worker_threads;
- /*
- * The value of the update buffer queue length falls into one of 3 zones:
- * green, yellow, red. If the value is in [0, green) nothing is
- * done, the buffers are left unprocessed to enable the caching effect of the
- * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
- * threads are gradually activated. In [yellow, red) all threads are
- * running. If the length becomes red (max queue length) the mutators start
- * processing the buffers.
- *
- * There are some interesting cases (when G1UseAdaptiveConcRefinement
- * is turned off):
- * 1) green = yellow = red = 0. In this case the mutator will process all
- * buffers. Except for those that are created by the deferred updates
- * machinery during a collection.
- * 2) green = 0. Means no caching. Can be a good way to minimize the
- * amount of time spent updating rsets during a collection.
- */
+ uint _num_max_threads;
+
+ // Create the refinement thread for the given worker id.
+ // If initializing is true, ignore InjectGCWorkerCreationFailure.
+ G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing);
+public:
+ G1ConcurrentRefineThreadControl();
+ ~G1ConcurrentRefineThreadControl();
+
+ jint initialize(G1ConcurrentRefine* cr, uint num_max_threads);
+
+ // If there is a "successor" thread that can be activated given the current id,
+ // activate it.
+ void maybe_activate_next(uint cur_worker_id);
+
+ void print_on(outputStream* st) const;
+ void worker_threads_do(ThreadClosure* tc);
+ void stop();
+};
+
+// Controls refinement threads and their activation based on the number of completed
+// buffers currently available in the global dirty card queue.
+// Refinement threads pick work from the queue based on these thresholds. They are activated
+// gradually based on the amount of work to do.
+// Refinement thread n activates thread n+1 if the instance of this class determines there
+// is enough work available. Threads deactivate themselves if the current amount of
+// completed buffers falls below their individual threshold.
+class G1ConcurrentRefine : public CHeapObj<mtGC> {
+ G1ConcurrentRefineThreadControl _thread_control;
+ /*
+ * The value of the completed dirty card queue length falls into one of 3 zones:
+ * green, yellow, red. If the value is in [0, green) nothing is
+ * done, the buffers are left unprocessed to enable the caching effect of the
+ * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
+ * threads are gradually activated. In [yellow, red) all threads are
+ * running. If the length becomes red (max queue length) the mutators start
+ * processing the buffers.
+ *
+ * There are some interesting cases (when G1UseAdaptiveConcRefinement
+ * is turned off):
+ * 1) green = yellow = red = 0. In this case the mutator will process all
+ * buffers. Except for those that are created by the deferred updates
+ * machinery during a collection.
+ * 2) green = 0. Means no caching. Can be a good way to minimize the
+ * amount of time spent updating remembered sets during a collection.
+ */
size_t _green_zone;
size_t _yellow_zone;
size_t _red_zone;
@@ -69,24 +102,32 @@
size_t update_rs_processed_buffers,
double goal_ms);
- // Update thread thresholds to account for updated zone values.
- void update_thread_thresholds();
+ static uint worker_id_offset();
+ void maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers);
- public:
+ jint initialize();
+public:
~G1ConcurrentRefine();
- // Returns a G1ConcurrentRefine instance if succeeded to create/initialize G1ConcurrentRefine and G1ConcurrentRefineThreads.
- // Otherwise, returns NULL with error code.
+ // Returns a G1ConcurrentRefine instance if succeeded to create/initialize the
+ // G1ConcurrentRefine instance. Otherwise, returns NULL with error code.
static G1ConcurrentRefine* create(jint* ecode);
void stop();
+ // Adjust refinement thresholds based on work done during the pause and the goal time.
void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms);
+ size_t activation_threshold(uint worker_id) const;
+ size_t deactivation_threshold(uint worker_id) const;
+ // Perform a single refinement step. Called by the refinement threads when woken up.
+ bool do_refinement_step(uint worker_id);
+
// Iterate over all concurrent refinement threads applying the given closure.
void threads_do(ThreadClosure *tc);
- static uint thread_num();
+ // Maximum number of refinement threads.
+ static uint max_num_threads();
void print_threads_on(outputStream* st) const;
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp Thu Nov 23 15:51:06 2017 +0100
@@ -25,32 +25,20 @@
#include "precompiled.hpp"
#include "gc/g1/g1ConcurrentRefine.hpp"
#include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/g1RemSet.hpp"
#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/mutexLocker.hpp"
-G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr,
- G1ConcurrentRefineThread *next,
- uint worker_id_offset,
- uint worker_id,
- size_t activate,
- size_t deactivate) :
+G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
ConcurrentGCThread(),
- _worker_id_offset(worker_id_offset),
_worker_id(worker_id),
_active(false),
- _next(next),
_monitor(NULL),
_cr(cr),
- _vtime_accum(0.0),
- _activation_threshold(activate),
- _deactivation_threshold(deactivate)
+ _vtime_accum(0.0)
{
-
// Each thread has its own monitor. The i-th thread is responsible for signaling
// to thread i+1 if the number of buffers in the queue exceeds a threshold for this
// thread. Monitors are also used to wake up the threads during termination.
@@ -67,13 +55,6 @@
create_and_start();
}
-void G1ConcurrentRefineThread::update_thresholds(size_t activate,
- size_t deactivate) {
- assert(deactivate < activate, "precondition");
- _activation_threshold = activate;
- _deactivation_threshold = deactivate;
-}
-
void G1ConcurrentRefineThread::wait_for_completed_buffers() {
MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
while (!should_terminate() && !is_active()) {
@@ -118,9 +99,9 @@
}
size_t buffers_processed = 0;
- DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
- log_debug(gc, refine)("Activated %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
- _worker_id, _activation_threshold, dcqs.completed_buffers_num());
+ log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
+ _worker_id, _cr->activation_threshold(_worker_id),
+ JavaThread::dirty_card_queue_set().completed_buffers_num());
{
SuspendibleThreadSetJoiner sts_join;
@@ -131,33 +112,18 @@
continue; // Re-check for termination after yield delay.
}
- size_t curr_buffer_num = dcqs.completed_buffers_num();
- // If the number of the buffers falls down into the yellow zone,
- // that means that the transition period after the evacuation pause has ended.
- if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cr()->yellow_zone()) {
- dcqs.set_completed_queue_padding(0);
- }
-
- // Check if we need to activate the next thread.
- if ((_next != NULL) &&
- !_next->is_active() &&
- (curr_buffer_num > _next->_activation_threshold)) {
- _next->activate();
- }
-
- // Process the next buffer, if there are enough left.
- if (!dcqs.refine_completed_buffer_concurrently(_worker_id + _worker_id_offset, _deactivation_threshold)) {
- break; // Deactivate, number of buffers fell below threshold.
+ if (!_cr->do_refinement_step(_worker_id)) {
+ break;
}
++buffers_processed;
}
}
deactivate();
- log_debug(gc, refine)("Deactivated %d, off threshold: " SIZE_FORMAT
+ log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT
", current: " SIZE_FORMAT ", processed: " SIZE_FORMAT,
- _worker_id, _deactivation_threshold,
- dcqs.completed_buffers_num(),
+ _worker_id, _cr->deactivation_threshold(_worker_id),
+ JavaThread::dirty_card_queue_set().completed_buffers_num(),
buffers_processed);
if (os::supports_vtime()) {
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp Thu Nov 23 15:51:06 2017 +0100
@@ -43,43 +43,29 @@
uint _worker_id;
uint _worker_id_offset;
- // The refinement threads collection is linked list. A predecessor can activate a successor
- // when the number of the rset update buffer crosses a certain threshold. A successor
- // would self-deactivate when the number of the buffers falls below the threshold.
bool _active;
- G1ConcurrentRefineThread* _next;
Monitor* _monitor;
G1ConcurrentRefine* _cr;
- // This thread's activation/deactivation thresholds
- size_t _activation_threshold;
- size_t _deactivation_threshold;
-
void wait_for_completed_buffers();
void set_active(bool x) { _active = x; }
- bool is_active();
- void activate();
+ // Deactivate this thread.
void deactivate();
bool is_primary() { return (_worker_id == 0); }
void run_service();
void stop_service();
+public:
+ G1ConcurrentRefineThread(G1ConcurrentRefine* cg1r, uint worker_id);
-public:
- // Constructor
- G1ConcurrentRefineThread(G1ConcurrentRefine* cr, G1ConcurrentRefineThread* next,
- uint worker_id_offset, uint worker_id,
- size_t activate, size_t deactivate);
-
- void update_thresholds(size_t activate, size_t deactivate);
- size_t activation_threshold() const { return _activation_threshold; }
+ bool is_active();
+ // Activate this thread.
+ void activate();
// Total virtual time so far.
double vtime_accum() { return _vtime_accum; }
-
- G1ConcurrentRefine* cr() { return _cr; }
};
#endif // SHARE_VM_GC_G1_G1CONCURRENTREFINETHREAD_HPP
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Thu Nov 23 15:51:06 2017 +0100
@@ -298,7 +298,7 @@
}
uint G1RemSet::num_par_rem_sets() {
- return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::thread_num(), ParallelGCThreads);
+ return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::max_num_threads(), ParallelGCThreads);
}
void G1RemSet::initialize(size_t capacity, uint max_regions) {
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp Thu Nov 23 15:51:06 2017 +0100
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp Thu Nov 23 15:51:06 2017 +0100
@@ -86,7 +86,7 @@
_num_processed_buf_mutator(0),
_num_processed_buf_rs_threads(0),
_num_coarsenings(0),
- _num_vtimes(G1ConcurrentRefine::thread_num()),
+ _num_vtimes(G1ConcurrentRefine::max_num_threads()),
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
_sampling_thread_vtime(0.0f) {
@@ -99,7 +99,7 @@
_num_processed_buf_mutator(0),
_num_processed_buf_rs_threads(0),
_num_coarsenings(0),
- _num_vtimes(G1ConcurrentRefine::thread_num()),
+ _num_vtimes(G1ConcurrentRefine::max_num_threads()),
_rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
_sampling_thread_vtime(0.0f) {
update();