# HG changeset patch # User jmasa # Date 1285018718 25200 # Node ID 67b1a69ef5aa15ee6f12ef916a7369dfca8a1d22 # Parent 516540f1f0769bcdb65006f6ad0662c5c8e830db 6984287: Regularize how GC parallel workers are specified. Summary: Associate number of GC workers with the workgang as opposed to the task. Reviewed-by: johnc, ysr diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,7 @@ if (_generations == NULL) vm_exit_during_initialization("Unable to allocate gen spec"); - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { if (UseAdaptiveSizePolicy) { _generations[0] = new GenerationSpec(Generation::ASParNew, _initial_gen0_size, _max_gen0_size); @@ -79,7 +79,7 @@ void ConcurrentMarkSweepPolicy::initialize_gc_policy_counters() { // initialize the policy counters - 2 collectors, 3 generations - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { _gc_policy_counters = new GCPolicyCounters("ParNew:CMS", 2, 3); } else { @@ -102,7 +102,7 @@ assert(size_policy() != NULL, "A size policy is required"); // initialize the policy counters - 2 collectors, 3 generations - if (UseParNewGC && ParallelGCThreads > 0) { + if (ParNewGeneration::in_use()) { _gc_policy_counters = new CMSGCAdaptivePolicyCounters("ParNew:CMS", 2, 3, size_policy()); } diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -124,7 +124,8 @@ checkFreeListConsistency(); // Initialize locks for parallel case. - if (ParallelGCThreads > 0) { + + if (CollectedHeap::use_parallel_gc_threads()) { for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { _indexedFreeListParLocks[i] = new Mutex(Mutex::leaf - 1, // == ExpandHeap_lock - 1 "a freelist par lock", @@ -1071,7 +1072,8 @@ // at address below "p" in finding the object that contains "p" // and those objects (if garbage) may have been modified to hold // live range information. - // assert(ParallelGCThreads > 0 || _bt.block_start(p) == p, "Should be a block boundary"); + // assert(CollectedHeap::use_parallel_gc_threads() || _bt.block_start(p) == p, + // "Should be a block boundary"); if (FreeChunk::indicatesFreeChunk(p)) return false; klassOop k = oop(p)->klass_or_null(); if (k != NULL) { @@ -2932,7 +2934,9 @@ "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } @@ -2972,6 +2976,8 @@ "n_tasks calculation incorrect"); SequentialSubTasksDone* pst = conc_par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -195,7 +195,7 @@ "Offset of FreeChunk::_prev within FreeChunk must match" " that of OopDesc::_klass within OopDesc"); ) - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { typedef CMSParGCThreadState* CMSParGCThreadStatePtr; _par_gc_thread_states = NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); @@ -616,7 +616,7 @@ } // Support for multi-threaded concurrent phases - if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { + if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { if (FLAG_IS_DEFAULT(ConcGCThreads)) { // just for now FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); @@ -628,6 +628,8 @@ warning("GC/CMS: _conc_workers allocation failure: " "forcing -CMSConcurrentMTEnabled"); CMSConcurrentMTEnabled = false; + } else { + _conc_workers->initialize_workers(); } } else { CMSConcurrentMTEnabled = false; @@ -936,7 +938,7 @@ // along with all the other pointers into the heap but // compaction is expected to be a rare event with // a heap using cms so don't do it without seeing the need. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.reset(); } @@ -2630,7 +2632,8 @@ // Should call gc_prologue_work() for all cms gens we are responsible for bool registerClosure = _collectorState >= Marking && _collectorState < Sweeping; - ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar + ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ? + &_modUnionClosurePar : &_modUnionClosure; _cmsGen->gc_prologue_work(full, registerClosure, muc); _permGen->gc_prologue_work(full, registerClosure, muc); @@ -2731,7 +2734,7 @@ collector()->gc_epilogue(full); // Also reset promotion tracking in par gc thread states. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); } @@ -3731,7 +3734,6 @@ // MT Concurrent Marking Task class CMSConcMarkingTask: public YieldingFlexibleGangTask { CMSCollector* _collector; - YieldingFlexibleWorkGang* _workers; // the whole gang int _n_workers; // requested/desired # workers bool _asynch; bool _result; @@ -3751,21 +3753,19 @@ CMSConcMarkingTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - bool asynch, int n_workers, + bool asynch, YieldingFlexibleWorkGang* workers, OopTaskQueueSet* task_queues): YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), - _asynch(asynch), _n_workers(n_workers), _result(true), - _workers(workers), _task_queues(task_queues), - _term(n_workers, task_queues, _collector, asynch), + _asynch(asynch), _n_workers(0), _result(true), + _task_queues(task_queues), + _term(_n_workers, task_queues, _collector, asynch), _bit_map_lock(collector->bitMapLock()) { - assert(n_workers <= workers->total_workers(), - "Else termination won't work correctly today"); // XXX FIX ME! - _requested_size = n_workers; + _requested_size = _n_workers; _term.set_task(this); assert(_cms_space->bottom() < _perm_space->bottom(), "Finger incorrectly initialized below"); @@ -3781,6 +3781,10 @@ CMSConcMarkingTerminator* terminator() { return &_term; } + virtual void set_for_termination(int active_workers) { + terminator()->reset_for_reuse(active_workers); + } + void work(int i); virtual void coordinator_yield(); // stuff done by coordinator @@ -4220,9 +4224,12 @@ CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); - CMSConcMarkingTask tsk(this, cms_space, perm_space, - asynch, num_workers /* number requested XXX */, - conc_workers(), task_queues()); + CMSConcMarkingTask tsk(this, + cms_space, + perm_space, + asynch, + conc_workers(), + task_queues()); // Since the actual number of workers we get may be different // from the number we requested above, do we need to do anything different @@ -4326,6 +4333,10 @@ verify_overflow_empty(); _abort_preclean = false; if (CMSPrecleaningEnabled) { + // Precleaning is currently not MT but the reference processor + // may be set for MT. Disable it temporarily here. + ReferenceProcessor* rp = ref_processor(); + ReferenceProcessorMTProcMutator z(rp, false); _eden_chunk_index = 0; size_t used = get_eden_used(); size_t capacity = get_eden_capacity(); @@ -4918,7 +4929,7 @@ // dirtied since the first checkpoint in this GC cycle and prior to // the most recent young generation GC, minus those cleaned up by the // concurrent precleaning. - if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { + if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) { TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); do_remark_parallel(); } else { @@ -5012,7 +5023,6 @@ // Parallel remark task class CMSParRemarkTask: public AbstractGangTask { CMSCollector* _collector; - WorkGang* _workers; int _n_workers; CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; @@ -5025,21 +5035,21 @@ CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - int n_workers, WorkGang* workers, + int n_workers, FlexibleWorkGang* workers, OopTaskQueueSet* task_queues): AbstractGangTask("Rescan roots and grey objects in parallel"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), _n_workers(n_workers), - _workers(workers), _task_queues(task_queues), - _term(workers->total_workers(), task_queues) { } + _term(n_workers, task_queues) { } OopTaskQueueSet* task_queues() { return _task_queues; } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } ParallelTaskTerminator* terminator() { return &_term; } + int n_workers() { return _n_workers; } void work(int i); @@ -5057,6 +5067,11 @@ void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); }; +// work_queue(i) is passed to the closure +// Par_MarkRefsIntoAndScanClosure. The "i" parameter +// also is passed to do_dirty_card_rescan_tasks() and to +// do_work_steal() to select the i-th task_queue. + void CMSParRemarkTask::work(int i) { elapsedTimer _timer; ResourceMark rm; @@ -5128,6 +5143,7 @@ // Do the rescan tasks for each of the two spaces // (cms_space and perm_space) in turn. + // "i" is passed to select the "i-th" task_queue do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); _timer.stop(); @@ -5150,6 +5166,7 @@ } } +// Note that parameter "i" is not used. void CMSParRemarkTask::do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, @@ -5309,8 +5326,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5367,11 +5389,12 @@ // Merge the per-thread plab arrays into the global survivor chunk // array which will provide the partitioning of the survivor space // for CMS rescan. -void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { +void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv, + int no_of_gc_threads) { assert(_survivor_plab_array != NULL, "Error"); assert(_survivor_chunk_array != NULL, "Error"); assert(_collectorState == FinalMarking, "Error"); - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { _cursor[j] = 0; } HeapWord* top = surv->top(); @@ -5379,7 +5402,7 @@ for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries HeapWord* min_val = top; // Higher than any PLAB address uint min_tid = 0; // position of min_val this round - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { ChunkArray* cur_sca = &_survivor_plab_array[j]; if (_cursor[j] == cur_sca->end()) { continue; @@ -5413,7 +5436,7 @@ // Verify that we used up all the recorded entries #ifdef ASSERT size_t total = 0; - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); total += _cursor[j]; } @@ -5448,13 +5471,15 @@ // Each valid entry in [0, _eden_chunk_index) represents a task. size_t n_tasks = _eden_chunk_index + 1; assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } // Merge the survivor plab arrays into _survivor_chunk_array if (_survivor_plab_array != NULL) { - merge_survivor_plab_arrays(dng->from()); + merge_survivor_plab_arrays(dng->from(), n_threads); } else { assert(_survivor_chunk_index == 0, "Error"); } @@ -5463,7 +5488,9 @@ { SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks(1); assert(pst->valid(), "Error"); } @@ -5474,7 +5501,9 @@ assert(!pst->valid(), "Clobbering existing data?"); size_t n_tasks = _survivor_chunk_index + 1; assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); assert(pst->valid(), "Error"); } @@ -5483,7 +5512,7 @@ // Parallel version of remark void CMSCollector::do_remark_parallel() { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); int n_workers = workers->total_workers(); CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); @@ -5636,13 +5665,11 @@ //////////////////////////////////////////////////////// // Parallel Reference Processing Task Proxy Class //////////////////////////////////////////////////////// -class CMSRefProcTaskProxy: public AbstractGangTask { +class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; CMSCollector* _collector; CMSBitMap* _mark_bit_map; const MemRegion _span; - OopTaskQueueSet* _task_queues; - ParallelTaskTerminator _term; ProcessTask& _task; public: @@ -5650,24 +5677,21 @@ CMSCollector* collector, const MemRegion& span, CMSBitMap* mark_bit_map, - int total_workers, + AbstractWorkGang* workers, OopTaskQueueSet* task_queues): - AbstractGangTask("Process referents by policy in parallel"), + AbstractGangTaskWOopQueues("Process referents by policy in parallel", + task_queues), _task(task), - _collector(collector), _span(span), _mark_bit_map(mark_bit_map), - _task_queues(task_queues), - _term(total_workers, task_queues) + _collector(collector), _span(span), _mark_bit_map(mark_bit_map) { assert(_collector->_span.equals(_span) && !_span.is_empty(), "Inconsistency in _span"); } - OopTaskQueueSet* task_queues() { return _task_queues; } + OopTaskQueueSet* task_queues() { return queues(); } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } - ParallelTaskTerminator* terminator() { return &_term; } - void do_work_steal(int i, CMSParDrainMarkingStackClosure* drain, CMSParKeepAliveClosure* keep_alive, @@ -5739,8 +5763,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // Found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5773,13 +5802,12 @@ void CMSRefProcTaskExecutor::execute(ProcessTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); - int n_workers = workers->total_workers(); CMSRefProcTaskProxy rp_task(task, &_collector, _collector.ref_processor()->span(), _collector.markBitMap(), - n_workers, _collector.task_queues()); + workers, _collector.task_queues()); workers->run_task(&rp_task); } @@ -5787,7 +5815,7 @@ { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); CMSRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -5814,6 +5842,14 @@ { TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); if (rp->processing_is_mt()) { + // Set the degree of MT here. If the discovery is done MT, there + // may have been a different number of threads doing the discovery + // and a different number of discovered lists may have Ref objects. + // That is OK as long as the Reference lists are balanced (see + // balance_all_queues() and balance_queues()). + + + rp->set_mt_degree(ParallelGCThreads); CMSRefProcTaskExecutor task_executor(*this); rp->process_discovered_references(&_is_alive_closure, &cmsKeepAliveClosure, @@ -5874,6 +5910,7 @@ rp->set_enqueuing_is_done(true); if (rp->processing_is_mt()) { + rp->balance_all_queues(); CMSRefProcTaskExecutor task_executor(*this); rp->enqueue_discovered_references(&task_executor); } else { @@ -8708,7 +8745,8 @@ // similar changes might be needed. // CR 6797058 has been filed to consolidate the common code. bool CMSCollector::par_take_from_overflow_list(size_t num, - OopTaskQueue* work_q) { + OopTaskQueue* work_q, + int no_of_gc_threads) { assert(work_q->size() == 0, "First empty local work queue"); assert(num < work_q->max_elems(), "Can't bite more than we can chew"); if (_overflow_list == NULL) { @@ -8717,7 +8755,9 @@ // Grab the entire list; we'll put back a suffix oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); Thread* tid = Thread::current(); - size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; + // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was + // set to ParallelGCThreads. + size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads; size_t sleep_time_millis = MAX2((size_t)1, num/100); // If the list is busy, we spin for a short while, // sleeping between attempts to get the list. @@ -9256,4 +9296,3 @@ true /* recordGCEndTime */, true /* countCollection */ ); } - diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -729,7 +729,9 @@ // Support for marking stack overflow handling bool take_from_overflow_list(size_t num, CMSMarkStack* to_stack); - bool par_take_from_overflow_list(size_t num, OopTaskQueue* to_work_q); + bool par_take_from_overflow_list(size_t num, + OopTaskQueue* to_work_q, + int no_of_gc_threads); void push_on_overflow_list(oop p); void par_push_on_overflow_list(oop p); // the following is, obviously, not, in general, "MT-stable" @@ -768,7 +770,7 @@ void abortable_preclean(); // Preclean while looking for possible abort void initialize_sequential_subtasks_for_young_gen_rescan(int i); // Helper function for above; merge-sorts the per-thread plab samples - void merge_survivor_plab_arrays(ContiguousSpace* surv); + void merge_survivor_plab_arrays(ContiguousSpace* surv, int no_of_gc_threads); // Resets (i.e. clears) the per-thread plab sample vectors void reset_survivor_plab_arrays(); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -583,10 +583,13 @@ #endif guarantee(parallel_marking_threads() > 0, "peace of mind"); - _parallel_workers = new WorkGang("G1 Parallel Marking Threads", - (int) parallel_marking_threads(), false, true); - if (_parallel_workers == NULL) + _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", + (int) _parallel_marking_threads, false, true); + if (_parallel_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); + } else { + _parallel_workers->initialize_workers(); + } } // so that the call below can read a sensible value @@ -1451,7 +1454,7 @@ _bm, _g1h->concurrent_mark(), _region_bm, _card_bm); calccl.no_yield(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&calccl, i, HeapRegion::FinalCountClaimValue); } else { @@ -1531,7 +1534,7 @@ G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &_par_cleanup_thread_state[i]->list, i); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, HeapRegion::NoteEndClaimValue); } else { @@ -1575,7 +1578,7 @@ {} void work(int i) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { _g1rs->scrub_par(_region_bm, _card_bm, i, HeapRegion::ScrubRemSetClaimValue); } else { @@ -1647,7 +1650,7 @@ // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), &_region_bm, &_card_bm); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { assert(g1h->check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); @@ -1695,7 +1698,7 @@ // Note end of marking in all heap regions. double note_end_start = os::elapsedTime(); G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { int n_workers = g1h->workers()->total_workers(); g1h->set_par_threads(n_workers); g1h->workers()->run_task(&g1_par_note_end_task); @@ -1720,7 +1723,7 @@ if (G1ScrubRemSets) { double rs_scrub_start = os::elapsedTime(); G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { int n_workers = g1h->workers()->total_workers(); g1h->set_par_threads(n_workers); g1h->workers()->run_task(&g1_par_scrub_rs_task); @@ -1934,7 +1937,7 @@ g1h->ensure_parsability(false); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { G1CollectedHeap::StrongRootsScope srs(g1h); // this is remark, so we'll use up all available threads int active_workers = ParallelGCThreads; @@ -3369,14 +3372,14 @@ CMObjectClosure oc(this); SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.set_par_closure(_task_id, &oc); else satb_mq_set.set_closure(&oc); // This keeps claiming and applying the closure to completed buffers // until we run out of buffers or we need to abort. - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { while (!has_aborted() && satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { if (_cm->verbose_medium()) @@ -3396,7 +3399,7 @@ if (!concurrent() && !has_aborted()) { // We should only do this during remark. - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.par_iterate_closure_all_threads(_task_id); else satb_mq_set.iterate_closure_all_threads(); @@ -3408,7 +3411,7 @@ concurrent() || satb_mq_set.completed_buffers_num() == 0, "invariant"); - if (ParallelGCThreads > 0) + if (G1CollectedHeap::use_parallel_gc_threads()) satb_mq_set.set_par_closure(_task_id, NULL); else satb_mq_set.set_closure(NULL); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -961,7 +961,8 @@ } // Rebuild remembered sets of all regions. - if (ParallelGCThreads > 0) { + + if (G1CollectedHeap::use_parallel_gc_threads()) { ParRebuildRSTask rebuild_rs_task(this); assert(check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); @@ -1960,7 +1961,7 @@ int worker, jint claim_value) { const size_t regions = n_regions(); - const size_t worker_num = (ParallelGCThreads > 0 ? ParallelGCThreads : 1); + const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); // try to spread out the starting points of the workers const size_t start_index = regions / worker_num * (size_t) worker; @@ -2527,7 +2528,7 @@ } void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->print_worker_threads_on(st); } @@ -2543,7 +2544,7 @@ } void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->threads_do(tc); } tc->do_thread(_cmThread); @@ -3083,7 +3084,7 @@ if (r != NULL) { r_used = r->used(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { // need to take the lock to guard against two threads calling // get_gc_alloc_region concurrently (very unlikely but...) MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); @@ -4182,6 +4183,8 @@ // *** Common G1 Evacuation Stuff +// This method is run in a GC worker. + void G1CollectedHeap:: g1_process_strong_roots(bool collecting_perm_gen, @@ -4259,7 +4262,7 @@ }; void G1CollectedHeap::save_marks() { - if (ParallelGCThreads == 0) { + if (!CollectedHeap::use_parallel_gc_threads()) { SaveMarksClosure sm; heap_region_iterate(&sm); } @@ -4284,7 +4287,7 @@ assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -656,6 +656,9 @@ bool _unclean_regions_coming; public: + + SubTasksDone* process_strong_tasks() { return _process_strong_tasks; } + void set_refine_cte_cl_concurrency(bool concurrent); RefToScanQueue *task_queue(int i) const; @@ -684,7 +687,7 @@ void set_par_threads(int t) { SharedHeap::set_par_threads(t); - _process_strong_tasks->set_par_threads(t); + _process_strong_tasks->set_n_threads(t); } virtual CollectedHeap::Name kind() const { diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -72,7 +72,10 @@ // G1CollectorPolicy::G1CollectorPolicy() : - _parallel_gc_threads((ParallelGCThreads > 0) ? ParallelGCThreads : 1), + _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads() + ? ParallelGCThreads : 1), + + _n_pauses(0), _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -1073,7 +1076,7 @@ } double G1CollectorPolicy::avg_value (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = 0.0; for (uint i = 0; i < ParallelGCThreads; ++i) ret += data[i]; @@ -1084,7 +1087,7 @@ } double G1CollectorPolicy::max_value (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = data[0]; for (uint i = 1; i < ParallelGCThreads; ++i) if (data[i] > ret) @@ -1096,7 +1099,7 @@ } double G1CollectorPolicy::sum_of_values (double* data) { - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { double sum = 0.0; for (uint i = 0; i < ParallelGCThreads; i++) sum += data[i]; @@ -1110,7 +1113,7 @@ double* data2) { double ret = data1[0] + data2[0]; - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { for (uint i = 1; i < ParallelGCThreads; ++i) { double data = data1[i] + data2[i]; if (data > ret) @@ -1126,7 +1129,7 @@ void G1CollectorPolicy::record_collection_pause_end() { double end_time_sec = os::elapsedTime(); double elapsed_ms = _last_pause_time_ms; - bool parallel = ParallelGCThreads > 0; + bool parallel = G1CollectedHeap::use_parallel_gc_threads(); double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0; size_t rs_size = _cur_collection_pause_used_regions_at_start - collection_set_size(); @@ -1941,7 +1944,7 @@ // Further, we're now always doing parallel collection. But I'm still // leaving this here as a placeholder for a more precise assertion later. // (DLD, 10/05.) - assert((true || ParallelGCThreads > 0) || + assert((true || G1CollectedHeap::use_parallel_gc_threads()) || _g1->evacuation_failed() || recent_survival_rate <= 1.0, "Or bad frac"); return recent_survival_rate; @@ -1961,7 +1964,7 @@ // Further, we're now always doing parallel collection. But I'm still // leaving this here as a placeholder for a more precise assertion later. // (DLD, 10/05.) - assert((true || ParallelGCThreads > 0) || + assert((true || G1CollectedHeap::use_parallel_gc_threads()) || last_survival_rate <= 1.0, "Or bad frac"); return last_survival_rate; } else { @@ -2121,7 +2124,7 @@ } void G1CollectorPolicy::print_summary(PauseSummary* summary) const { - bool parallel = ParallelGCThreads > 0; + bool parallel = G1CollectedHeap::use_parallel_gc_threads(); MainBodySummary* body_summary = summary->main_body_summary(); if (summary->get_total_seq()->num() > 0) { print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq()); @@ -2559,7 +2562,7 @@ gclog_or_tty->print_cr(" clear marked regions + work1: %8.3f ms.", (clear_marked_end - start)*1000.0); } - if (ParallelGCThreads > 0) { + if (G1CollectedHeap::use_parallel_gc_threads()) { const size_t OverpartitionFactor = 4; const size_t MinWorkUnit = 8; const size_t WorkUnit = diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -523,7 +523,7 @@ assert(!_traversal_in_progress, "Invariant between iterations."); set_traversal(true); if (ParallelGCThreads > 0) { - _seq_task->set_par_threads((int)n_workers()); + _seq_task->set_n_threads((int)n_workers()); } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007, 2010 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -44,7 +44,7 @@ int n_strides = n_threads * StridesPerThread; SequentialSubTasksDone* pst = sp->par_seq_tasks(); - pst->set_par_threads(n_threads); + pst->set_n_threads(n_threads); pst->set_n_tasks(n_strides); int stride = 0; diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1533,3 +1533,7 @@ const char* ParNewGeneration::name() const { return "par new generation"; } + +bool ParNewGeneration::in_use() { + return UseParNewGC && ParallelGCThreads > 0; +} diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -350,6 +350,8 @@ delete _task_queues; } + static bool in_use(); + virtual void ref_processor_init(); virtual Generation::Name kind() { return Generation::ParNew; } virtual const char* name() const; diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -242,7 +242,11 @@ // class DrainStacksCompactionTask : public GCTask { + uint _stack_index; + uint stack_index() { return _stack_index; } public: + DrainStacksCompactionTask(uint stack_index) : GCTask(), + _stack_index(stack_index) {}; char* name() { return (char *)"drain-region-task"; } virtual void do_it(GCTaskManager* manager, uint which); }; diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -2449,7 +2449,7 @@ const unsigned int task_count = MAX2(parallel_gc_threads, 1U); for (unsigned int j = 0; j < task_count; j++) { - q->enqueue(new DrainStacksCompactionTask()); + q->enqueue(new DrainStacksCompactionTask(j)); } // Find all regions that are available (can be filled immediately) and diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_interface/collectedHeap.cpp --- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,9 @@ // Memory state functions. -CollectedHeap::CollectedHeap() + +CollectedHeap::CollectedHeap() : _n_par_threads(0) + { const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT)); const size_t elements_per_word = HeapWordSize / sizeof(jint); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/gc_interface/collectedHeap.hpp --- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -59,6 +59,8 @@ MemRegion _reserved; BarrierSet* _barrier_set; bool _is_gc_active; + int _n_par_threads; + unsigned int _total_collections; // ... started unsigned int _total_full_collections; // ... started NOT_PRODUCT(volatile size_t _promotion_failure_alot_count;) @@ -293,6 +295,12 @@ } GCCause::Cause gc_cause() { return _gc_cause; } + // Number of threads currently working on GC tasks. + int n_par_threads() { return _n_par_threads; } + + // May be overridden to set additional parallelism. + virtual void set_par_threads(int t) { _n_par_threads = t; }; + // Preload classes into the shared portion of the heap, and then dump // that data to a file so that it can be loaded directly by another // VM (then terminate). @@ -606,6 +614,14 @@ return (CIFireOOMAt > 1 && _fire_out_of_memory_count >= CIFireOOMAt); } #endif + + public: + // This is a convenience method that is used in cases where + // the actual number of GC worker threads is not pertinent but + // only whether there more than 0. Use of this method helps + // reduce the occurrence of ParallelGCThreads to uses where the + // actual number may be germane. + static bool use_parallel_gc_threads() { return ParallelGCThreads > 0; } }; // Class to set and reset the GC cause for a CollectedHeap. diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/includeDB_core --- a/hotspot/src/share/vm/includeDB_core Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/includeDB_core Mon Sep 20 14:38:38 2010 -0700 @@ -4721,6 +4721,7 @@ workgroup.cpp os.hpp workgroup.cpp workgroup.hpp +workgroup.hpp taskqueue.hpp workgroup.hpp thread_.inline.hpp xmlstream.cpp allocation.hpp diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/genCollectedHeap.cpp --- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -676,7 +676,7 @@ void GenCollectedHeap::set_par_threads(int t) { SharedHeap::set_par_threads(t); - _gen_process_strong_tasks->set_par_threads(t); + _gen_process_strong_tasks->set_n_threads(t); } class AssertIsPermClosure: public OopClosure { diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/genCollectedHeap.hpp --- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,6 +74,7 @@ // Data structure for claiming the (potentially) parallel tasks in // (gen-specific) strong roots processing. SubTasksDone* _gen_process_strong_tasks; + SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; } // In block contents verification, the number of header words to skip NOT_PRODUCT(static size_t _skip_header_HeapWords;) diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/referenceProcessor.cpp --- a/hotspot/src/share/vm/memory/referenceProcessor.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -137,16 +137,17 @@ _discovery_is_atomic = atomic_discovery; _discovery_is_mt = mt_discovery; _num_q = mt_degree; - _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _num_q * subclasses_of_ref); + _max_num_q = mt_degree; + _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref); if (_discoveredSoftRefs == NULL) { vm_exit_during_initialization("Could not allocated RefProc Array"); } - _discoveredWeakRefs = &_discoveredSoftRefs[_num_q]; - _discoveredFinalRefs = &_discoveredWeakRefs[_num_q]; - _discoveredPhantomRefs = &_discoveredFinalRefs[_num_q]; + _discoveredWeakRefs = &_discoveredSoftRefs[_max_num_q]; + _discoveredFinalRefs = &_discoveredWeakRefs[_max_num_q]; + _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q]; assert(sentinel_ref() != NULL, "_sentinelRef is NULL"); // Initialized all entries to _sentinelRef - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); } @@ -159,7 +160,7 @@ #ifndef PRODUCT void ReferenceProcessor::verify_no_references_recorded() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { guarantee(_discoveredSoftRefs[i].empty(), "Found non-empty discovered list"); } @@ -167,7 +168,11 @@ #endif void ReferenceProcessor::weak_oops_do(OopClosure* f) { - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + // Should this instead be + // for (int i = 0; i < subclasses_of_ref; i++_ { + // for (int j = 0; j < _num_q; j++) { + // int index = i * _max_num_q + j; + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { if (UseCompressedOops) { f->do_oop((narrowOop*)_discoveredSoftRefs[i].adr_head()); } else { @@ -395,7 +400,15 @@ assert(work_id < (unsigned int)_ref_processor.num_q(), "Index out-of-bounds"); // Simplest first cut: static partitioning. int index = work_id; - for (int j = 0; j < subclasses_of_ref; j++, index += _n_queues) { + // The increment on "index" must correspond to the maximum number of queues + // (n_queues) with which that ReferenceProcessor was created. That + // is because of the "clever" way the discovered references lists were + // allocated and are indexed into. That number is ParallelGCThreads + // currently. Assert that. + assert(_n_queues == (int) ParallelGCThreads, "Different number not expected"); + for (int j = 0; + j < subclasses_of_ref; + j++, index += _n_queues) { _ref_processor.enqueue_discovered_reflist( _refs_lists[index], _pending_list_addr); _refs_lists[index].set_head(_sentinel_ref); @@ -410,11 +423,11 @@ if (_processing_is_mt && task_executor != NULL) { // Parallel code RefProcEnqueueTask tsk(*this, _discoveredSoftRefs, - pending_list_addr, sentinel_ref(), _num_q); + pending_list_addr, sentinel_ref(), _max_num_q); task_executor->execute(tsk); } else { // Serial code: call the parent class's implementation - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr); _discoveredSoftRefs[i].set_head(sentinel_ref()); _discoveredSoftRefs[i].set_length(0); @@ -614,8 +627,9 @@ complete_gc->do_void(); NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d dead Refs out of %d " - "discovered Refs by policy ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d dead Refs out of %d " + "discovered Refs by policy list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -651,8 +665,9 @@ } NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d active Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d active Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -689,8 +704,9 @@ complete_gc->do_void(); NOT_PRODUCT( if (PrintGCDetails && TraceReferenceGC) { - gclog_or_tty->print(" Dropped %d active Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d active Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } @@ -704,6 +720,7 @@ BoolObjectClosure* is_alive, OopClosure* keep_alive, VoidClosure* complete_gc) { + ResourceMark rm; DiscoveredListIterator iter(refs_list, keep_alive, is_alive); while (iter.has_next()) { iter.update_discovered(); @@ -743,8 +760,8 @@ void ReferenceProcessor::abandon_partial_discovery() { // loop over the lists - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { - if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) { gclog_or_tty->print_cr( "\nAbandoning %s discovered list", list_name(i)); @@ -766,7 +783,9 @@ OopClosure& keep_alive, VoidClosure& complete_gc) { - _ref_processor.process_phase1(_refs_lists[i], _policy, + Thread* thr = Thread::current(); + int refs_list_index = ((WorkerThread*)thr)->id(); + _ref_processor.process_phase1(_refs_lists[refs_list_index], _policy, &is_alive, &keep_alive, &complete_gc); } private: @@ -802,6 +821,11 @@ OopClosure& keep_alive, VoidClosure& complete_gc) { + // Don't use "refs_list_index" calculated in this way because + // balance_queues() has moved the Ref's into the first n queues. + // Thread* thr = Thread::current(); + // int refs_list_index = ((WorkerThread*)thr)->id(); + // _ref_processor.process_phase3(_refs_lists[refs_list_index], _clear_referent, _ref_processor.process_phase3(_refs_lists[i], _clear_referent, &is_alive, &keep_alive, &complete_gc); } @@ -810,23 +834,47 @@ }; // Balances reference queues. +// Move entries from all queues[0, 1, ..., _max_num_q-1] to +// queues[0, 1, ..., _num_q-1] because only the first _num_q +// corresponding to the active workers will be processed. void ReferenceProcessor::balance_queues(DiscoveredList ref_lists[]) { // calculate total length size_t total_refs = 0; - for (int i = 0; i < _num_q; ++i) { + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr("\nBalance ref_lists "); + } + + for (int i = 0; i < _max_num_q; ++i) { total_refs += ref_lists[i].length(); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print("%d ", ref_lists[i].length()); + } + } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" = %d", total_refs); } size_t avg_refs = total_refs / _num_q + 1; int to_idx = 0; - for (int from_idx = 0; from_idx < _num_q; from_idx++) { - while (ref_lists[from_idx].length() > avg_refs) { + for (int from_idx = 0; from_idx < _max_num_q; from_idx++) { + bool move_all = false; + if (from_idx >= _num_q) { + move_all = ref_lists[from_idx].length() > 0; + } + while ((ref_lists[from_idx].length() > avg_refs) || + move_all) { assert(to_idx < _num_q, "Sanity Check!"); if (ref_lists[to_idx].length() < avg_refs) { // move superfluous refs - size_t refs_to_move = - MIN2(ref_lists[from_idx].length() - avg_refs, - avg_refs - ref_lists[to_idx].length()); + size_t refs_to_move; + // Move all the Ref's if the from queue will not be processed. + if (move_all) { + refs_to_move = MIN2(ref_lists[from_idx].length(), + avg_refs - ref_lists[to_idx].length()); + } else { + refs_to_move = MIN2(ref_lists[from_idx].length() - avg_refs, + avg_refs - ref_lists[to_idx].length()); + } oop move_head = ref_lists[from_idx].head(); oop move_tail = move_head; oop new_head = move_head; @@ -840,11 +888,35 @@ ref_lists[to_idx].inc_length(refs_to_move); ref_lists[from_idx].set_head(new_head); ref_lists[from_idx].dec_length(refs_to_move); + if (ref_lists[from_idx].length() == 0) { + break; + } } else { - ++to_idx; + to_idx = (to_idx + 1) % _num_q; } } } +#ifdef ASSERT + size_t balanced_total_refs = 0; + for (int i = 0; i < _max_num_q; ++i) { + balanced_total_refs += ref_lists[i].length(); + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print("%d ", ref_lists[i].length()); + } + } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr(" = %d", balanced_total_refs); + gclog_or_tty->flush(); + } + assert(total_refs == balanced_total_refs, "Balancing was incomplete"); +#endif +} + +void ReferenceProcessor::balance_all_queues() { + balance_queues(_discoveredSoftRefs); + balance_queues(_discoveredWeakRefs); + balance_queues(_discoveredFinalRefs); + balance_queues(_discoveredPhantomRefs); } void @@ -857,8 +929,17 @@ VoidClosure* complete_gc, AbstractRefProcTaskExecutor* task_executor) { - bool mt = task_executor != NULL && _processing_is_mt; - if (mt && ParallelRefProcBalancingEnabled) { + bool mt_processing = task_executor != NULL && _processing_is_mt; + // If discovery used MT and a dynamic number of GC threads, then + // the queues must be balanced for correctness if fewer than the + // maximum number of queues were used. The number of queue used + // during discovery may be different than the number to be used + // for processing so don't depend of _num_q < _max_num_q as part + // of the test. + bool must_balance = _discovery_is_mt; + + if ((mt_processing && ParallelRefProcBalancingEnabled) || + must_balance) { balance_queues(refs_lists); } if (PrintReferenceGC && PrintGCDetails) { @@ -875,7 +956,7 @@ // policy reasons. Keep alive the transitive closure of all // such referents. if (policy != NULL) { - if (mt) { + if (mt_processing) { RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/); task_executor->execute(phase1); } else { @@ -891,7 +972,7 @@ // Phase 2: // . Traverse the list and remove any refs whose referents are alive. - if (mt) { + if (mt_processing) { RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/); task_executor->execute(phase2); } else { @@ -902,7 +983,7 @@ // Phase 3: // . Traverse the list and process referents as appropriate. - if (mt) { + if (mt_processing) { RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/); task_executor->execute(phase3); } else { @@ -915,7 +996,11 @@ void ReferenceProcessor::clean_up_discovered_references() { // loop over the lists - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + // Should this instead be + // for (int i = 0; i < subclasses_of_ref; i++_ { + // for (int j = 0; j < _num_q; j++) { + // int index = i * _max_num_q + j; + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { gclog_or_tty->print_cr( "\nScrubbing %s discovered list of Null referents", @@ -976,7 +1061,7 @@ id = next_id(); } } - assert(0 <= id && id < _num_q, "Id is out-of-bounds (call Freud?)"); + assert(0 <= id && id < _max_num_q, "Id is out-of-bounds (call Freud?)"); // Get the discovered queue to which we will add DiscoveredList* list = NULL; @@ -1001,6 +1086,10 @@ default: ShouldNotReachHere(); } + if (TraceReferenceGC && PrintGCDetails) { + gclog_or_tty->print_cr("Thread %d gets list " INTPTR_FORMAT, + id, list); + } return list; } @@ -1243,7 +1332,7 @@ { TraceTime tt("Preclean SoftReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { if (yield->should_return()) { return; } @@ -1340,15 +1429,16 @@ NOT_PRODUCT( if (PrintGCDetails && PrintReferenceGC) { - gclog_or_tty->print(" Dropped %d Refs out of %d " - "Refs in discovered list ", iter.removed(), iter.processed()); + gclog_or_tty->print_cr(" Dropped %d Refs out of %d " + "Refs in discovered list " INTPTR_FORMAT, + iter.removed(), iter.processed(), (address)refs_list.head()); } ) } const char* ReferenceProcessor::list_name(int i) { - assert(i >= 0 && i <= _num_q * subclasses_of_ref, "Out of bounds index"); - int j = i / _num_q; + assert(i >= 0 && i <= _max_num_q * subclasses_of_ref, "Out of bounds index"); + int j = i / _max_num_q; switch (j) { case 0: return "SoftRef"; case 1: return "WeakRef"; @@ -1372,7 +1462,7 @@ #ifndef PRODUCT void ReferenceProcessor::clear_discovered_references() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { oop obj = _discoveredSoftRefs[i].head(); while (obj != sentinel_ref()) { oop next = java_lang_ref_Reference::discovered(obj); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/referenceProcessor.hpp --- a/hotspot/src/share/vm/memory/referenceProcessor.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/referenceProcessor.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,8 +85,10 @@ // The discovered ref lists themselves - // The MT'ness degree of the queues below + // The active MT'ness degree of the queues below int _num_q; + // The maximum MT'ness degree of the queues below + int _max_num_q; // Arrays of lists of oops, one per thread DiscoveredList* _discoveredSoftRefs; DiscoveredList* _discoveredWeakRefs; @@ -95,6 +97,7 @@ public: int num_q() { return _num_q; } + void set_mt_degree(int v) { _num_q = v; } DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } static oop sentinel_ref() { return _sentinelRef; } static oop* adr_sentinel_ref() { return &_sentinelRef; } @@ -244,6 +247,7 @@ _bs(NULL), _is_alive_non_header(NULL), _num_q(0), + _max_num_q(0), _processing_is_mt(false), _next_id(0) {} @@ -312,6 +316,9 @@ void weak_oops_do(OopClosure* f); // weak roots static void oops_do(OopClosure* f); // strong root(s) + // Balance each of the discovered lists. + void balance_all_queues(); + // Discover a Reference object, using appropriate discovery criteria bool discover_reference(oop obj, ReferenceType rt); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/sharedHeap.cpp --- a/hotspot/src/share/vm/memory/sharedHeap.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/sharedHeap.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,7 +50,8 @@ _perm_gen(NULL), _rem_set(NULL), _strong_roots_parity(0), _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)), - _workers(NULL), _n_par_threads(0) + _n_par_threads(0), + _workers(NULL) { if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { vm_exit_during_initialization("Failed necessary allocation."); @@ -60,11 +61,13 @@ (UseConcMarkSweepGC && CMSParallelRemarkEnabled) || UseG1GC) && ParallelGCThreads > 0) { - _workers = new WorkGang("Parallel GC Threads", ParallelGCThreads, + _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads, /* are_GC_task_threads */true, /* are_ConcurrentGC_threads */false); if (_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); + } else { + _workers->initialize_workers(); } } } @@ -77,8 +80,9 @@ } void SharedHeap::set_par_threads(int t) { + assert(t == 0 || !UseSerialGC, "Cannot have parallel threads"); _n_par_threads = t; - _process_strong_tasks->set_par_threads(t); + _process_strong_tasks->set_n_threads(t); } class AssertIsPermClosure: public OopClosure { diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/memory/sharedHeap.hpp --- a/hotspot/src/share/vm/memory/sharedHeap.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/memory/sharedHeap.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ class ObjectClosure; class SubTasksDone; class WorkGang; +class FlexibleWorkGang; class CollectorPolicy; class KlassHandle; @@ -74,7 +75,7 @@ int _strong_roots_parity; // If we're doing parallel GC, use this gang of threads. - WorkGang* _workers; + FlexibleWorkGang* _workers; // Number of parallel threads currently working on GC tasks. // O indicates use sequential code; 1 means use parallel code even with @@ -189,7 +190,7 @@ SO_CodeCache = 0x10 }; - WorkGang* workers() const { return _workers; } + FlexibleWorkGang* workers() const { return _workers; } // Sets the number of parallel threads that will be doing tasks // (such as process strong roots) subsequently. diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/taskqueue.cpp --- a/hotspot/src/share/vm/utilities/taskqueue.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/taskqueue.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -144,6 +144,7 @@ bool ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + assert(_n_threads > 0, "Initialization is incorrect"); assert(_offered_termination < _n_threads, "Invariant"); Atomic::inc(&_offered_termination); @@ -255,3 +256,9 @@ _index < objArrayOop(_obj)->length(); } #endif // ASSERT + +void ParallelTaskTerminator::reset_for_reuse(int n_threads) { + reset_for_reuse(); + _n_threads = n_threads; +} + diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/taskqueue.hpp --- a/hotspot/src/share/vm/utilities/taskqueue.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/taskqueue.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -305,6 +305,12 @@ return false; } +// pop_local_slow() is done by the owning thread and is trying to +// get the last task in the queue. It will compete with pop_global() +// that will be used by other threads. The tag age is incremented +// whenever the queue goes empty which it will do here if this thread +// gets the last task or in pop_global() if the queue wraps (top == 0 +// and pop_global() succeeds, see pop_global()). template bool GenericTaskQueue::pop_local_slow(uint localBot, Age oldAge) { // This queue was observed to contain exactly one element; either this @@ -637,6 +643,9 @@ // in an MT-safe manner, once the previous round of use of // the terminator is finished. void reset_for_reuse(); + // Same as above but the number of parallel threads is set to the + // given number. + void reset_for_reuse(int n_threads); #ifdef TRACESPINNING static uint total_yields() { return _total_yields; } @@ -782,3 +791,4 @@ typedef OverflowTaskQueue RegionTaskQueue; typedef GenericTaskQueueSet RegionTaskQueueSet; + diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/workgroup.cpp --- a/hotspot/src/share/vm/utilities/workgroup.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/workgroup.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,28 +53,52 @@ int workers, bool are_GC_task_threads, bool are_ConcurrentGC_threads) : - AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) -{ + AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) { // Save arguments. _total_workers = workers; +} + +GangWorker* WorkGang::allocate_worker(int which) { + GangWorker* new_worker = new GangWorker(this, which); + return new_worker; +} + +// The current implementation will exit if the allocation +// of any worker fails. Still, return a boolean so that +// a future implementation can possibly do a partial +// initialization of the workers and report such to the +// caller. +bool WorkGang::initialize_workers() { if (TraceWorkGang) { - tty->print_cr("Constructing work gang %s with %d threads", name, workers); + tty->print_cr("Constructing work gang %s with %d threads", + name(), + total_workers()); } - _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); + _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, total_workers()); if (gang_workers() == NULL) { vm_exit_out_of_memory(0, "Cannot create GangWorker array."); + return false; + } + os::ThreadType worker_type; + if (are_ConcurrentGC_threads()) { + worker_type = os::cgc_thread; + } else { + worker_type = os::pgc_thread; } for (int worker = 0; worker < total_workers(); worker += 1) { - GangWorker* new_worker = new GangWorker(this, worker); + GangWorker* new_worker = allocate_worker(worker); assert(new_worker != NULL, "Failed to allocate GangWorker"); _gang_workers[worker] = new_worker; - if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread)) + if (new_worker == NULL || !os::create_thread(new_worker, worker_type)) { vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources."); + return false; + } if (!DisableStartThread) { os::start_thread(new_worker); } } + return true; } AbstractWorkGang::~AbstractWorkGang() { @@ -383,7 +407,7 @@ return _tasks != NULL; } -void SubTasksDone::set_par_threads(int t) { +void SubTasksDone::set_n_threads(int t) { #ifdef ASSERT assert(_claimed == 0 || _threads_completed == _n_threads, "should not be called while tasks are being processed!"); diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/workgroup.hpp --- a/hotspot/src/share/vm/utilities/workgroup.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/workgroup.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -29,6 +29,7 @@ class YieldingFlexibleGangWorker; class YieldingFlexibleGangTask; class WorkData; +class AbstractWorkGang; // An abstract task to be worked on by a gang. // You subclass this to supply your own work() method @@ -38,6 +39,13 @@ // The argument tells you which member of the gang you are. virtual void work(int i) = 0; + // This method configures the task for proper termination. + // Some tasks do not have any requirements on termination + // and may inherit this method that does nothing. Some + // tasks do some coordination on termination and override + // this method to implement that coordination. + virtual void set_for_termination(int active_workers) {}; + // Debugging accessor for the name. const char* name() const PRODUCT_RETURN_(return NULL;); int counter() { return _counter; } @@ -64,6 +72,18 @@ virtual ~AbstractGangTask() { } }; +class AbstractGangTaskWOopQueues : public AbstractGangTask { + OopTaskQueueSet* _queues; + ParallelTaskTerminator _terminator; + public: + AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) : + AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {} + ParallelTaskTerminator* terminator() { return &_terminator; } + virtual void set_for_termination(int active_workers) { + terminator()->reset_for_reuse(active_workers); + } + OopTaskQueueSet* queues() { return _queues; } +}; // Class AbstractWorkGang: // An abstract class representing a gang of workers. @@ -114,6 +134,9 @@ int total_workers() const { return _total_workers; } + virtual int active_workers() const { + return _total_workers; + } bool terminate() const { return _terminate; } @@ -199,6 +222,13 @@ bool are_GC_task_threads, bool are_ConcurrentGC_threads); // Run a task, returns when the task is done (or terminated). virtual void run_task(AbstractGangTask* task); + void run_task(AbstractGangTask* task, uint no_of_parallel_workers); + // Allocate a worker and return a pointer to it. + virtual GangWorker* allocate_worker(int which); + // Initialize workers in the gang. Return true if initialization + // succeeded. The type of the worker can be overridden in a derived + // class with the appropriate implementation of allocate_worker(). + bool initialize_workers(); }; // Class GangWorker: @@ -226,6 +256,34 @@ AbstractWorkGang* gang() const { return _gang; } }; +class FlexibleWorkGang: public WorkGang { + protected: + int _active_workers; + public: + // Constructor and destructor. + FlexibleWorkGang(const char* name, int workers, + bool are_GC_task_threads, + bool are_ConcurrentGC_threads) : + WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) { + _active_workers = ParallelGCThreads; + }; + // Accessors for fields + virtual int active_workers() const { return _active_workers; } + void set_active_workers(int v) { _active_workers = v; } +}; + +// Work gangs in garbage collectors: 2009-06-10 +// +// SharedHeap - work gang for stop-the-world parallel collection. +// Used by +// ParNewGeneration +// CMSParRemarkTask +// CMSRefProcTaskExecutor +// G1CollectedHeap +// G1ParFinalCountTask +// ConcurrentMark +// CMSCollector + // A class that acts as a synchronisation barrier. Workers enter // the barrier and must wait until all other workers have entered // before any of them may leave. @@ -271,7 +329,7 @@ int _n_threads; jint _threads_completed; #ifdef ASSERT - jint _claimed; + volatile jint _claimed; #endif // Set all tasks to unclaimed. @@ -286,9 +344,10 @@ // True iff the object is in a valid state. bool valid(); - // Set the number of parallel threads doing the tasks to "t". Can only + // Get/set the number of parallel threads doing the tasks to "t". Can only // be called before tasks start or after they are complete. - void set_par_threads(int t); + int n_threads() { return _n_threads; } + void set_n_threads(int t); // Returns "false" if the task "t" is unclaimed, and ensures that task is // claimed. The task "t" is required to be within the range of "this". @@ -315,13 +374,17 @@ protected: jint _n_tasks; // Total number of tasks available. jint _n_claimed; // Number of tasks claimed. + // _n_threads is used to determine when a sub task is done. + // See comments on SubTasksDone::_n_threads jint _n_threads; // Total number of parallel threads. jint _n_completed; // Number of completed threads. void clear(); public: - SequentialSubTasksDone() { clear(); } + SequentialSubTasksDone() { + clear(); + } ~SequentialSubTasksDone() {} // True iff the object is in a valid state. @@ -330,11 +393,12 @@ // number of tasks jint n_tasks() const { return _n_tasks; } - // Set the number of parallel threads doing the tasks to t. + // Get/set the number of parallel threads doing the tasks to t. // Should be called before the task starts but it is safe // to call this once a task is running provided that all // threads agree on the number of threads. - void set_par_threads(int t) { _n_threads = t; } + int n_threads() { return _n_threads; } + void set_n_threads(int t) { _n_threads = t; } // Set the number of tasks to be claimed to t. As above, // should be called before the tasks start but it is safe diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,29 +32,13 @@ YieldingFlexibleWorkGang::YieldingFlexibleWorkGang( const char* name, int workers, bool are_GC_task_threads) : - AbstractWorkGang(name, are_GC_task_threads, false) { - // Save arguments. - _total_workers = workers; - assert(_total_workers > 0, "Must have more than 1 worker"); - - _yielded_workers = 0; + FlexibleWorkGang(name, workers, are_GC_task_threads, false), + _yielded_workers(0) {} - if (TraceWorkGang) { - tty->print_cr("Constructing work gang %s with %d threads", name, workers); - } - _gang_workers = NEW_C_HEAP_ARRAY(GangWorker*, workers); - assert(gang_workers() != NULL, "Failed to allocate gang workers"); - for (int worker = 0; worker < total_workers(); worker += 1) { - YieldingFlexibleGangWorker* new_worker = - new YieldingFlexibleGangWorker(this, worker); - assert(new_worker != NULL, "Failed to allocate YieldingFlexibleGangWorker"); - _gang_workers[worker] = new_worker; - if (new_worker == NULL || !os::create_thread(new_worker, os::pgc_thread)) - vm_exit_out_of_memory(0, "Cannot create worker GC thread. Out of system resources."); - if (!DisableStartThread) { - os::start_thread(new_worker); - } - } +GangWorker* YieldingFlexibleWorkGang::allocate_worker(int which) { + YieldingFlexibleGangWorker* new_member = + new YieldingFlexibleGangWorker(this, which); + return (YieldingFlexibleGangWorker*) new_member; } // Run a task; returns when the task is done, or the workers yield, @@ -142,6 +126,7 @@ _active_workers = total_workers(); } new_task->set_actual_size(_active_workers); + new_task->set_for_termination(_active_workers); assert(_started_workers == 0, "Tabula rasa non"); assert(_finished_workers == 0, "Tabula rasa non"); @@ -161,22 +146,22 @@ for (Status status = yielding_task()->status(); status != COMPLETED && status != YIELDED && status != ABORTED; status = yielding_task()->status()) { - assert(started_workers() <= active_workers(), "invariant"); - assert(finished_workers() <= active_workers(), "invariant"); - assert(yielded_workers() <= active_workers(), "invariant"); + assert(started_workers() <= total_workers(), "invariant"); + assert(finished_workers() <= total_workers(), "invariant"); + assert(yielded_workers() <= total_workers(), "invariant"); monitor()->wait(Mutex::_no_safepoint_check_flag); } switch (yielding_task()->status()) { case COMPLETED: case ABORTED: { - assert(finished_workers() == active_workers(), "Inconsistent status"); + assert(finished_workers() == total_workers(), "Inconsistent status"); assert(yielded_workers() == 0, "Invariant"); reset(); // for next task; gang<->task binding released break; } case YIELDED: { assert(yielded_workers() > 0, "Invariant"); - assert(yielded_workers() + finished_workers() == active_workers(), + assert(yielded_workers() + finished_workers() == total_workers(), "Inconsistent counts"); break; } @@ -208,7 +193,6 @@ void YieldingFlexibleWorkGang::reset() { _started_workers = 0; _finished_workers = 0; - _active_workers = 0; yielding_task()->set_gang(NULL); _task = NULL; // unbind gang from task } @@ -216,7 +200,7 @@ void YieldingFlexibleWorkGang::yield() { assert(task() != NULL, "Inconsistency; should have task binding"); MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); - assert(yielded_workers() < active_workers(), "Consistency check"); + assert(yielded_workers() < total_workers(), "Consistency check"); if (yielding_task()->status() == ABORTING) { // Do not yield; we need to abort as soon as possible // XXX NOTE: This can cause a performance pathology in the @@ -227,7 +211,7 @@ // us to return at each potential yield point. return; } - if (++_yielded_workers + finished_workers() == active_workers()) { + if (++_yielded_workers + finished_workers() == total_workers()) { yielding_task()->set_status(YIELDED); monitor()->notify_all(); } else { diff -r 516540f1f076 -r 67b1a69ef5aa hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp Thu Sep 16 13:45:55 2010 -0700 +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp Mon Sep 20 14:38:38 2010 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010 Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -54,6 +54,25 @@ virtual void loop(); }; +class FlexibleGangTask: public AbstractGangTask { + int _actual_size; // size of gang obtained +protected: + int _requested_size; // size of gang requested +public: + FlexibleGangTask(const char* name): AbstractGangTask(name), + _requested_size(0) {} + + // The abstract work method. + // The argument tells you which member of the gang you are. + virtual void work(int i) = 0; + + int requested_size() const { return _requested_size; } + int actual_size() const { return _actual_size; } + + void set_requested_size(int sz) { _requested_size = sz; } + void set_actual_size(int sz) { _actual_size = sz; } +}; + // An abstract task to be worked on by a flexible work gang, // and where the workers will periodically yield, usually // in response to some condition that is signalled by means @@ -70,19 +89,15 @@ // maximum) in response to task requests at certain points. // The last part (the flexible part) has not yet been fully // fleshed out and is a work in progress. -class YieldingFlexibleGangTask: public AbstractGangTask { +class YieldingFlexibleGangTask: public FlexibleGangTask { Status _status; YieldingFlexibleWorkGang* _gang; - int _actual_size; // size of gang obtained protected: - int _requested_size; // size of gang requested - // Constructor and desctructor: only construct subclasses. - YieldingFlexibleGangTask(const char* name): AbstractGangTask(name), + YieldingFlexibleGangTask(const char* name): FlexibleGangTask(name), _status(INACTIVE), - _gang(NULL), - _requested_size(0) { } + _gang(NULL) { } virtual ~YieldingFlexibleGangTask() { } @@ -126,20 +141,13 @@ bool completed() const { return _status == COMPLETED; } bool aborted() const { return _status == ABORTED; } bool active() const { return _status == ACTIVE; } - - int requested_size() const { return _requested_size; } - int actual_size() const { return _actual_size; } - - void set_requested_size(int sz) { _requested_size = sz; } - void set_actual_size(int sz) { _actual_size = sz; } }; - // Class YieldingWorkGang: A subclass of WorkGang. // In particular, a YieldingWorkGang is made up of // YieldingGangWorkers, and provides infrastructure // supporting yielding to the "GangOverseer", // being the thread that orchestrates the WorkGang via run_task(). -class YieldingFlexibleWorkGang: public AbstractWorkGang { +class YieldingFlexibleWorkGang: public FlexibleWorkGang { // Here's the public interface to this class. public: // Constructor and destructor. @@ -151,6 +159,9 @@ "Incorrect cast"); return (YieldingFlexibleGangTask*)task(); } + // Allocate a worker and return a pointer to it. + GangWorker* allocate_worker(int which); + // Run a task; returns when the task is done, or the workers yield, // or the task is aborted, or the work gang is terminated via stop(). // A task that has been yielded can be continued via this same interface @@ -180,10 +191,6 @@ void abort(); private: - // The currently active workers in this gang. - // This is a number that is dynamically adjusted by - // the run_task() method at each subsequent invocation, - // using data in the YieldingFlexibleGangTask. int _active_workers; int _yielded_workers; void wait_for_gang(); @@ -194,6 +201,7 @@ return _active_workers; } + // Accessors for fields int yielded_workers() const { return _yielded_workers; }