--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Sep 16 13:45:55 2010 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Sep 20 14:38:38 2010 -0700
@@ -195,7 +195,7 @@
"Offset of FreeChunk::_prev within FreeChunk must match"
" that of OopDesc::_klass within OopDesc");
)
- if (ParallelGCThreads > 0) {
+ if (CollectedHeap::use_parallel_gc_threads()) {
typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
_par_gc_thread_states =
NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
@@ -616,7 +616,7 @@
}
// Support for multi-threaded concurrent phases
- if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
+ if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
if (FLAG_IS_DEFAULT(ConcGCThreads)) {
// just for now
FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
@@ -628,6 +628,8 @@
warning("GC/CMS: _conc_workers allocation failure: "
"forcing -CMSConcurrentMTEnabled");
CMSConcurrentMTEnabled = false;
+ } else {
+ _conc_workers->initialize_workers();
}
} else {
CMSConcurrentMTEnabled = false;
@@ -936,7 +938,7 @@
// along with all the other pointers into the heap but
// compaction is expected to be a rare event with
// a heap using cms so don't do it without seeing the need.
- if (ParallelGCThreads > 0) {
+ if (CollectedHeap::use_parallel_gc_threads()) {
for (uint i = 0; i < ParallelGCThreads; i++) {
_par_gc_thread_states[i]->promo.reset();
}
@@ -2630,7 +2632,8 @@
// Should call gc_prologue_work() for all cms gens we are responsible for
bool registerClosure = _collectorState >= Marking
&& _collectorState < Sweeping;
- ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
+ ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
+ &_modUnionClosurePar
: &_modUnionClosure;
_cmsGen->gc_prologue_work(full, registerClosure, muc);
_permGen->gc_prologue_work(full, registerClosure, muc);
@@ -2731,7 +2734,7 @@
collector()->gc_epilogue(full);
// Also reset promotion tracking in par gc thread states.
- if (ParallelGCThreads > 0) {
+ if (CollectedHeap::use_parallel_gc_threads()) {
for (uint i = 0; i < ParallelGCThreads; i++) {
_par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
}
@@ -3731,7 +3734,6 @@
// MT Concurrent Marking Task
class CMSConcMarkingTask: public YieldingFlexibleGangTask {
CMSCollector* _collector;
- YieldingFlexibleWorkGang* _workers; // the whole gang
int _n_workers; // requested/desired # workers
bool _asynch;
bool _result;
@@ -3751,21 +3753,19 @@
CMSConcMarkingTask(CMSCollector* collector,
CompactibleFreeListSpace* cms_space,
CompactibleFreeListSpace* perm_space,
- bool asynch, int n_workers,
+ bool asynch,
YieldingFlexibleWorkGang* workers,
OopTaskQueueSet* task_queues):
YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
_collector(collector),
_cms_space(cms_space),
_perm_space(perm_space),
- _asynch(asynch), _n_workers(n_workers), _result(true),
- _workers(workers), _task_queues(task_queues),
- _term(n_workers, task_queues, _collector, asynch),
+ _asynch(asynch), _n_workers(0), _result(true),
+ _task_queues(task_queues),
+ _term(_n_workers, task_queues, _collector, asynch),
_bit_map_lock(collector->bitMapLock())
{
- assert(n_workers <= workers->total_workers(),
- "Else termination won't work correctly today"); // XXX FIX ME!
- _requested_size = n_workers;
+ _requested_size = _n_workers;
_term.set_task(this);
assert(_cms_space->bottom() < _perm_space->bottom(),
"Finger incorrectly initialized below");
@@ -3781,6 +3781,10 @@
CMSConcMarkingTerminator* terminator() { return &_term; }
+ virtual void set_for_termination(int active_workers) {
+ terminator()->reset_for_reuse(active_workers);
+ }
+
void work(int i);
virtual void coordinator_yield(); // stuff done by coordinator
@@ -4220,9 +4224,12 @@
CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
- CMSConcMarkingTask tsk(this, cms_space, perm_space,
- asynch, num_workers /* number requested XXX */,
- conc_workers(), task_queues());
+ CMSConcMarkingTask tsk(this,
+ cms_space,
+ perm_space,
+ asynch,
+ conc_workers(),
+ task_queues());
// Since the actual number of workers we get may be different
// from the number we requested above, do we need to do anything different
@@ -4326,6 +4333,10 @@
verify_overflow_empty();
_abort_preclean = false;
if (CMSPrecleaningEnabled) {
+ // Precleaning is currently not MT but the reference processor
+ // may be set for MT. Disable it temporarily here.
+ ReferenceProcessor* rp = ref_processor();
+ ReferenceProcessorMTProcMutator z(rp, false);
_eden_chunk_index = 0;
size_t used = get_eden_used();
size_t capacity = get_eden_capacity();
@@ -4918,7 +4929,7 @@
// dirtied since the first checkpoint in this GC cycle and prior to
// the most recent young generation GC, minus those cleaned up by the
// concurrent precleaning.
- if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
+ if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
do_remark_parallel();
} else {
@@ -5012,7 +5023,6 @@
// Parallel remark task
class CMSParRemarkTask: public AbstractGangTask {
CMSCollector* _collector;
- WorkGang* _workers;
int _n_workers;
CompactibleFreeListSpace* _cms_space;
CompactibleFreeListSpace* _perm_space;
@@ -5025,21 +5035,21 @@
CMSParRemarkTask(CMSCollector* collector,
CompactibleFreeListSpace* cms_space,
CompactibleFreeListSpace* perm_space,
- int n_workers, WorkGang* workers,
+ int n_workers, FlexibleWorkGang* workers,
OopTaskQueueSet* task_queues):
AbstractGangTask("Rescan roots and grey objects in parallel"),
_collector(collector),
_cms_space(cms_space), _perm_space(perm_space),
_n_workers(n_workers),
- _workers(workers),
_task_queues(task_queues),
- _term(workers->total_workers(), task_queues) { }
+ _term(n_workers, task_queues) { }
OopTaskQueueSet* task_queues() { return _task_queues; }
OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
ParallelTaskTerminator* terminator() { return &_term; }
+ int n_workers() { return _n_workers; }
void work(int i);
@@ -5057,6 +5067,11 @@
void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
};
+// work_queue(i) is passed to the closure
+// Par_MarkRefsIntoAndScanClosure. The "i" parameter
+// also is passed to do_dirty_card_rescan_tasks() and to
+// do_work_steal() to select the i-th task_queue.
+
void CMSParRemarkTask::work(int i) {
elapsedTimer _timer;
ResourceMark rm;
@@ -5128,6 +5143,7 @@
// Do the rescan tasks for each of the two spaces
// (cms_space and perm_space) in turn.
+ // "i" is passed to select the "i-th" task_queue
do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
_timer.stop();
@@ -5150,6 +5166,7 @@
}
}
+// Note that parameter "i" is not used.
void
CMSParRemarkTask::do_young_space_rescan(int i,
Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
@@ -5309,8 +5326,13 @@
size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
// Now check if there's any work in the overflow list
+ // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+ // only affects the number of attempts made to get work from the
+ // overflow list and does not affect the number of workers. Just
+ // pass ParallelGCThreads so this behavior is unchanged.
if (_collector->par_take_from_overflow_list(num_from_overflow_list,
- work_q)) {
+ work_q,
+ ParallelGCThreads)) {
// found something in global overflow list;
// not yet ready to go stealing work from others.
// We'd like to assert(work_q->size() != 0, ...)
@@ -5367,11 +5389,12 @@
// Merge the per-thread plab arrays into the global survivor chunk
// array which will provide the partitioning of the survivor space
// for CMS rescan.
-void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
+void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
+ int no_of_gc_threads) {
assert(_survivor_plab_array != NULL, "Error");
assert(_survivor_chunk_array != NULL, "Error");
assert(_collectorState == FinalMarking, "Error");
- for (uint j = 0; j < ParallelGCThreads; j++) {
+ for (int j = 0; j < no_of_gc_threads; j++) {
_cursor[j] = 0;
}
HeapWord* top = surv->top();
@@ -5379,7 +5402,7 @@
for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries
HeapWord* min_val = top; // Higher than any PLAB address
uint min_tid = 0; // position of min_val this round
- for (uint j = 0; j < ParallelGCThreads; j++) {
+ for (int j = 0; j < no_of_gc_threads; j++) {
ChunkArray* cur_sca = &_survivor_plab_array[j];
if (_cursor[j] == cur_sca->end()) {
continue;
@@ -5413,7 +5436,7 @@
// Verify that we used up all the recorded entries
#ifdef ASSERT
size_t total = 0;
- for (uint j = 0; j < ParallelGCThreads; j++) {
+ for (int j = 0; j < no_of_gc_threads; j++) {
assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
total += _cursor[j];
}
@@ -5448,13 +5471,15 @@
// Each valid entry in [0, _eden_chunk_index) represents a task.
size_t n_tasks = _eden_chunk_index + 1;
assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
- pst->set_par_threads(n_threads);
+ // Sets the condition for completion of the subtask (how many threads
+ // need to finish in order to be done).
+ pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
}
// Merge the survivor plab arrays into _survivor_chunk_array
if (_survivor_plab_array != NULL) {
- merge_survivor_plab_arrays(dng->from());
+ merge_survivor_plab_arrays(dng->from(), n_threads);
} else {
assert(_survivor_chunk_index == 0, "Error");
}
@@ -5463,7 +5488,9 @@
{
SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
- pst->set_par_threads(n_threads);
+ // Sets the condition for completion of the subtask (how many threads
+ // need to finish in order to be done).
+ pst->set_n_threads(n_threads);
pst->set_n_tasks(1);
assert(pst->valid(), "Error");
}
@@ -5474,7 +5501,9 @@
assert(!pst->valid(), "Clobbering existing data?");
size_t n_tasks = _survivor_chunk_index + 1;
assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
- pst->set_par_threads(n_threads);
+ // Sets the condition for completion of the subtask (how many threads
+ // need to finish in order to be done).
+ pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
assert(pst->valid(), "Error");
}
@@ -5483,7 +5512,7 @@
// Parallel version of remark
void CMSCollector::do_remark_parallel() {
GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
int n_workers = workers->total_workers();
CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
@@ -5636,13 +5665,11 @@
////////////////////////////////////////////////////////
// Parallel Reference Processing Task Proxy Class
////////////////////////////////////////////////////////
-class CMSRefProcTaskProxy: public AbstractGangTask {
+class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
CMSCollector* _collector;
CMSBitMap* _mark_bit_map;
const MemRegion _span;
- OopTaskQueueSet* _task_queues;
- ParallelTaskTerminator _term;
ProcessTask& _task;
public:
@@ -5650,24 +5677,21 @@
CMSCollector* collector,
const MemRegion& span,
CMSBitMap* mark_bit_map,
- int total_workers,
+ AbstractWorkGang* workers,
OopTaskQueueSet* task_queues):
- AbstractGangTask("Process referents by policy in parallel"),
+ AbstractGangTaskWOopQueues("Process referents by policy in parallel",
+ task_queues),
_task(task),
- _collector(collector), _span(span), _mark_bit_map(mark_bit_map),
- _task_queues(task_queues),
- _term(total_workers, task_queues)
+ _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
{
assert(_collector->_span.equals(_span) && !_span.is_empty(),
"Inconsistency in _span");
}
- OopTaskQueueSet* task_queues() { return _task_queues; }
+ OopTaskQueueSet* task_queues() { return queues(); }
OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
- ParallelTaskTerminator* terminator() { return &_term; }
-
void do_work_steal(int i,
CMSParDrainMarkingStackClosure* drain,
CMSParKeepAliveClosure* keep_alive,
@@ -5739,8 +5763,13 @@
size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
// Now check if there's any work in the overflow list
+ // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+ // only affects the number of attempts made to get work from the
+ // overflow list and does not affect the number of workers. Just
+ // pass ParallelGCThreads so this behavior is unchanged.
if (_collector->par_take_from_overflow_list(num_from_overflow_list,
- work_q)) {
+ work_q,
+ ParallelGCThreads)) {
// Found something in global overflow list;
// not yet ready to go stealing work from others.
// We'd like to assert(work_q->size() != 0, ...)
@@ -5773,13 +5802,12 @@
void CMSRefProcTaskExecutor::execute(ProcessTask& task)
{
GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
- int n_workers = workers->total_workers();
CMSRefProcTaskProxy rp_task(task, &_collector,
_collector.ref_processor()->span(),
_collector.markBitMap(),
- n_workers, _collector.task_queues());
+ workers, _collector.task_queues());
workers->run_task(&rp_task);
}
@@ -5787,7 +5815,7 @@
{
GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
CMSRefEnqueueTaskProxy enq_task(task);
workers->run_task(&enq_task);
@@ -5814,6 +5842,14 @@
{
TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
if (rp->processing_is_mt()) {
+ // Set the degree of MT here. If the discovery is done MT, there
+ // may have been a different number of threads doing the discovery
+ // and a different number of discovered lists may have Ref objects.
+ // That is OK as long as the Reference lists are balanced (see
+ // balance_all_queues() and balance_queues()).
+
+
+ rp->set_mt_degree(ParallelGCThreads);
CMSRefProcTaskExecutor task_executor(*this);
rp->process_discovered_references(&_is_alive_closure,
&cmsKeepAliveClosure,
@@ -5874,6 +5910,7 @@
rp->set_enqueuing_is_done(true);
if (rp->processing_is_mt()) {
+ rp->balance_all_queues();
CMSRefProcTaskExecutor task_executor(*this);
rp->enqueue_discovered_references(&task_executor);
} else {
@@ -8708,7 +8745,8 @@
// similar changes might be needed.
// CR 6797058 has been filed to consolidate the common code.
bool CMSCollector::par_take_from_overflow_list(size_t num,
- OopTaskQueue* work_q) {
+ OopTaskQueue* work_q,
+ int no_of_gc_threads) {
assert(work_q->size() == 0, "First empty local work queue");
assert(num < work_q->max_elems(), "Can't bite more than we can chew");
if (_overflow_list == NULL) {
@@ -8717,7 +8755,9 @@
// Grab the entire list; we'll put back a suffix
oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
Thread* tid = Thread::current();
- size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+ // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
+ // set to ParallelGCThreads.
+ size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
size_t sleep_time_millis = MAX2((size_t)1, num/100);
// If the list is busy, we spin for a short while,
// sleeping between attempts to get the list.
@@ -9256,4 +9296,3 @@
true /* recordGCEndTime */,
true /* countCollection */ );
}
-