# HG changeset patch # User jcoomes # Date 1322867478 28800 # Node ID 3ba6557b91f70251c3a880a8a21d1e686a8514cc # Parent f0eccb2946986fb9626efde7d8ed9c8192623f5c# Parent d7066920d582f9320209f874e6fccdf9182a6ebd Merge diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/make/hotspot_version --- a/hotspot/make/hotspot_version Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/make/hotspot_version Fri Dec 02 15:11:18 2011 -0800 @@ -35,7 +35,7 @@ HS_MAJOR_VER=23 HS_MINOR_VER=0 -HS_BUILD_NUMBER=06 +HS_BUILD_NUMBER=07 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/make/linux/makefiles/mapfile-vers-debug --- a/hotspot/make/linux/makefiles/mapfile-vers-debug Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/make/linux/makefiles/mapfile-vers-debug Fri Dec 02 15:11:18 2011 -0800 @@ -1,7 +1,3 @@ -# -# @(#)mapfile-vers-debug 1.18 07/10/25 16:47:35 -# - # # Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -221,6 +217,7 @@ JVM_SetArrayElement; JVM_SetClassSigners; JVM_SetLength; + JVM_SetNativeThreadName; JVM_SetPrimitiveArrayElement; JVM_SetProtectionDomain; JVM_SetSockOpt; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/make/linux/makefiles/mapfile-vers-product --- a/hotspot/make/linux/makefiles/mapfile-vers-product Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/make/linux/makefiles/mapfile-vers-product Fri Dec 02 15:11:18 2011 -0800 @@ -1,7 +1,3 @@ -# -# @(#)mapfile-vers-product 1.19 08/02/12 10:56:37 -# - # # Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -221,6 +217,7 @@ JVM_SetArrayElement; JVM_SetClassSigners; JVM_SetLength; + JVM_SetNativeThreadName; JVM_SetPrimitiveArrayElement; JVM_SetProtectionDomain; JVM_SetSockOpt; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/make/solaris/makefiles/mapfile-vers --- a/hotspot/make/solaris/makefiles/mapfile-vers Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/make/solaris/makefiles/mapfile-vers Fri Dec 02 15:11:18 2011 -0800 @@ -1,7 +1,3 @@ -# -# @(#)mapfile-vers 1.32 07/10/25 16:47:36 -# - # # Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -221,6 +217,7 @@ JVM_SetArrayElement; JVM_SetClassSigners; JVM_SetLength; + JVM_SetNativeThreadName; JVM_SetPrimitiveArrayElement; JVM_SetProtectionDomain; JVM_SetSockOpt; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/make/windows/makefiles/projectcreator.make --- a/hotspot/make/windows/makefiles/projectcreator.make Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/make/windows/makefiles/projectcreator.make Fri Dec 02 15:11:18 2011 -0800 @@ -50,6 +50,7 @@ -relativeInclude src\closed\os_cpu\windows_$(Platform_arch)\vm \ -relativeInclude src\closed\cpu\$(Platform_arch)\vm \ -relativeInclude src\share\vm \ + -relativeInclude src\share\vm\precompiled \ -relativeInclude src\share\vm\prims \ -relativeInclude src\os\windows\vm \ -relativeInclude src\os_cpu\windows_$(Platform_arch)\vm \ diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/os/bsd/vm/os_bsd.cpp --- a/hotspot/src/os/bsd/vm/os_bsd.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/os/bsd/vm/os_bsd.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -5778,15 +5778,18 @@ // is_headless_jre() // -// Test for the existence of libmawt in motif21 or xawt directories +// Test for the existence of xawt/libmawt.so or libawt_xawt.so // in order to report if we are running in a headless jre // +// Since JDK8 xawt/libmawt.so was moved into the same directory +// as libawt.so, and renamed libawt_xawt.so +// bool os::is_headless_jre() { struct stat statbuf; char buf[MAXPATHLEN]; char libmawtpath[MAXPATHLEN]; const char *xawtstr = "/xawt/libmawt" JNI_LIB_SUFFIX; - const char *motifstr = "/motif21/libmawt" JNI_LIB_SUFFIX; + const char *new_xawtstr = "/libawt_xawt" JNI_LIB_SUFFIX; char *p; // Get path to libjvm.so @@ -5807,9 +5810,9 @@ strcat(libmawtpath, xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; - // check motif21/libmawt.so + // check libawt_xawt.so strcpy(libmawtpath, buf); - strcat(libmawtpath, motifstr); + strcat(libmawtpath, new_xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; return true; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/os/linux/vm/os_linux.cpp --- a/hotspot/src/os/linux/vm/os_linux.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -5425,15 +5425,18 @@ // is_headless_jre() // -// Test for the existence of libmawt in motif21 or xawt directories +// Test for the existence of xawt/libmawt.so or libawt_xawt.so // in order to report if we are running in a headless jre // +// Since JDK8 xawt/libmawt.so was moved into the same directory +// as libawt.so, and renamed libawt_xawt.so +// bool os::is_headless_jre() { struct stat statbuf; char buf[MAXPATHLEN]; char libmawtpath[MAXPATHLEN]; const char *xawtstr = "/xawt/libmawt.so"; - const char *motifstr = "/motif21/libmawt.so"; + const char *new_xawtstr = "/libawt_xawt.so"; char *p; // Get path to libjvm.so @@ -5454,9 +5457,9 @@ strcat(libmawtpath, xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; - // check motif21/libmawt.so + // check libawt_xawt.so strcpy(libmawtpath, buf); - strcat(libmawtpath, motifstr); + strcat(libmawtpath, new_xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; return true; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/os/solaris/vm/os_solaris.cpp --- a/hotspot/src/os/solaris/vm/os_solaris.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -6311,15 +6311,18 @@ // is_headless_jre() // -// Test for the existence of libmawt in motif21 or xawt directories +// Test for the existence of xawt/libmawt.so or libawt_xawt.so // in order to report if we are running in a headless jre // +// Since JDK8 xawt/libmawt.so was moved into the same directory +// as libawt.so, and renamed libawt_xawt.so +// bool os::is_headless_jre() { struct stat statbuf; char buf[MAXPATHLEN]; char libmawtpath[MAXPATHLEN]; const char *xawtstr = "/xawt/libmawt.so"; - const char *motifstr = "/motif21/libmawt.so"; + const char *new_xawtstr = "/libawt_xawt.so"; char *p; // Get path to libjvm.so @@ -6340,9 +6343,9 @@ strcat(libmawtpath, xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; - // check motif21/libmawt.so + // check libawt_xawt.so strcpy(libmawtpath, buf); - strcat(libmawtpath, motifstr); + strcat(libmawtpath, new_xawtstr); if (::stat(libmawtpath, &statbuf) == 0) return false; return true; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -668,12 +668,16 @@ // We de-virtualize the block-related calls below, since we know that our // space is a CompactibleFreeListSpace. + #define FreeListSpace_DCTOC__walk_mem_region_with_cl_DEFN(ClosureType) \ void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr, \ HeapWord* bottom, \ HeapWord* top, \ ClosureType* cl) { \ - if (SharedHeap::heap()->n_par_threads() > 0) { \ + bool is_par = SharedHeap::heap()->n_par_threads() > 0; \ + if (is_par) { \ + assert(SharedHeap::heap()->n_par_threads() == \ + SharedHeap::heap()->workers()->active_workers(), "Mismatch"); \ walk_mem_region_with_cl_par(mr, bottom, top, cl); \ } else { \ walk_mem_region_with_cl_nopar(mr, bottom, top, cl); \ @@ -1925,6 +1929,9 @@ if (rem_size < SmallForDictionary) { bool is_par = (SharedHeap::heap()->n_par_threads() > 0); if (is_par) _indexedFreeListParLocks[rem_size]->lock(); + assert(!is_par || + (SharedHeap::heap()->n_par_threads() == + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); returnChunkToFreeList(ffc); split(size, rem_size); if (is_par) _indexedFreeListParLocks[rem_size]->unlock(); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -3582,16 +3582,6 @@ " or no bits are set in the gc_prologue before the start of the next " "subsequent marking phase."); - // Temporarily disabled, since pre/post-consumption closures don't - // care about precleaned cards - #if 0 - { - MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(), - (HeapWord*)_virtual_space.high()); - _ct->ct_bs()->preclean_dirty_cards(mr); - } - #endif - // Save the end of the used_region of the constituent generations // to be used to limit the extent of sweep in each generation. save_sweep_limits(); @@ -4244,9 +4234,11 @@ bool CMSCollector::do_marking_mt(bool asynch) { assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition"); - // In the future this would be determined ergonomically, based - // on #cpu's, # active mutator threads (and load), and mutation rate. - int num_workers = ConcGCThreads; + int num_workers = AdaptiveSizePolicy::calc_active_conc_workers( + conc_workers()->total_workers(), + conc_workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + conc_workers()->set_active_workers(num_workers); CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); @@ -5062,6 +5054,8 @@ ParallelTaskTerminator _term; public: + // A value of 0 passed to n_workers will cause the number of + // workers to be taken from the active workers in the work gang. CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, @@ -5544,7 +5538,15 @@ GenCollectedHeap* gch = GenCollectedHeap::heap(); FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); - int n_workers = workers->total_workers(); + // Choose to use the number of GC workers most recently set + // into "active_workers". If active_workers is not set, set it + // to ParallelGCThreads. + int n_workers = workers->active_workers(); + if (n_workers == 0) { + assert(n_workers > 0, "Should have been set during scavenge"); + n_workers = ParallelGCThreads; + workers->set_active_workers(n_workers); + } CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); @@ -5884,8 +5886,17 @@ // and a different number of discovered lists may have Ref objects. // That is OK as long as the Reference lists are balanced (see // balance_all_queues() and balance_queues()). - - rp->set_active_mt_degree(ParallelGCThreads); + GenCollectedHeap* gch = GenCollectedHeap::heap(); + int active_workers = ParallelGCThreads; + FlexibleWorkGang* workers = gch->workers(); + if (workers != NULL) { + active_workers = workers->active_workers(); + // The expectation is that active_workers will have already + // been set to a reasonable value. If it has not been set, + // investigate. + assert(active_workers > 0, "Should have been set during scavenge"); + } + rp->set_active_mt_degree(active_workers); CMSRefProcTaskExecutor task_executor(*this); rp->process_discovered_references(&_is_alive_closure, &cmsKeepAliveClosure, diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -255,7 +255,18 @@ CollectionSetChooser:: prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) { _first_par_unreserved_idx = 0; - size_t max_waste = ParallelGCThreads * chunkSize; + int n_threads = ParallelGCThreads; + if (UseDynamicNumberOfGCThreads) { + assert(G1CollectedHeap::heap()->workers()->active_workers() > 0, + "Should have been set earlier"); + // This is defensive code. As the assertion above says, the number + // of active threads should be > 0, but in case there is some path + // or some improperly initialized variable with leads to no + // active threads, protect against that in a product build. + n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(), + 1); + } + size_t max_waste = n_threads * chunkSize; // it should be aligned with respect to chunkSize size_t aligned_n_regions = (n_regions + (chunkSize - 1)) / chunkSize * chunkSize; @@ -265,6 +276,11 @@ jint CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) { + // Don't do this assert because this can be called at a point + // where the loop up stream will not execute again but might + // try to claim more chunks (loop test has not been done yet). + // assert(_markedRegions.length() > _first_par_unreserved_idx, + // "Striding beyond the marked regions"); jint res = Atomic::add(n_regions, &_first_par_unreserved_idx); assert(_markedRegions.length() > res + n_regions - 1, "Should already have been expanded"); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -44,7 +44,7 @@ // // CMS Bit Map Wrapper -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : _bm((uintptr_t*)NULL,0), _shifter(shifter) { _bmStartWord = (HeapWord*)(rs.base()); @@ -458,12 +458,17 @@ #pragma warning( disable:4355 ) // 'this' : used in base member initializer list #endif // _MSC_VER +size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) { + return MAX2((n_par_threads + 2) / 4, (size_t)1); +} + ConcurrentMark::ConcurrentMark(ReservedSpace rs, int max_regions) : _markBitMap1(rs, MinObjAlignment - 1), _markBitMap2(rs, MinObjAlignment - 1), _parallel_marking_threads(0), + _max_parallel_marking_threads(0), _sleep_factor(0.0), _marking_task_overhead(1.0), _cleanup_sleep_factor(0.0), @@ -554,15 +559,17 @@ if (ParallelGCThreads == 0) { // if we are not running with any parallel GC threads we will not // spawn any marking threads either - _parallel_marking_threads = 0; - _sleep_factor = 0.0; - _marking_task_overhead = 1.0; + _parallel_marking_threads = 0; + _max_parallel_marking_threads = 0; + _sleep_factor = 0.0; + _marking_task_overhead = 1.0; } else { if (ConcGCThreads > 0) { // notice that ConcGCThreads overwrites G1MarkingOverheadPercent // if both are set _parallel_marking_threads = ConcGCThreads; + _max_parallel_marking_threads = _parallel_marking_threads; _sleep_factor = 0.0; _marking_task_overhead = 1.0; } else if (G1MarkingOverheadPercent > 0) { @@ -583,10 +590,12 @@ (1.0 - marking_task_overhead) / marking_task_overhead; _parallel_marking_threads = (size_t) marking_thread_num; + _max_parallel_marking_threads = _parallel_marking_threads; _sleep_factor = sleep_factor; _marking_task_overhead = marking_task_overhead; } else { - _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); + _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads); + _max_parallel_marking_threads = _parallel_marking_threads; _sleep_factor = 0.0; _marking_task_overhead = 1.0; } @@ -609,7 +618,7 @@ guarantee(parallel_marking_threads() > 0, "peace of mind"); _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", - (int) _parallel_marking_threads, false, true); + (int) _max_parallel_marking_threads, false, true); if (_parallel_workers == NULL) { vm_exit_during_initialization("Failed necessary allocation."); } else { @@ -1106,6 +1115,33 @@ ~CMConcurrentMarkingTask() { } }; +// Calculates the number of active workers for a concurrent +// phase. +int ConcurrentMark::calc_parallel_marking_threads() { + + size_t n_conc_workers; + if (!G1CollectedHeap::use_parallel_gc_threads()) { + n_conc_workers = 1; + } else { + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ConcGCThreads) && + !ForceDynamicNumberOfGCThreads)) { + n_conc_workers = max_parallel_marking_threads(); + } else { + n_conc_workers = + AdaptiveSizePolicy::calc_default_active_workers( + max_parallel_marking_threads(), + 1, /* Minimum workers */ + parallel_marking_threads(), + Threads::number_of_non_daemon_threads()); + // Don't scale down "n_conc_workers" by scale_parallel_threads() because + // that scaling has already gone into "_max_parallel_marking_threads". + } + } + assert(n_conc_workers > 0, "Always need at least 1"); + return (int) MAX2(n_conc_workers, (size_t) 1); +} + void ConcurrentMark::markFromRoots() { // we might be tempted to assert that: // assert(asynch == !SafepointSynchronize::is_at_safepoint(), @@ -1116,9 +1152,20 @@ _restart_for_overflow = false; - size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); + // Parallel task terminator is set in "set_phase()". force_overflow_conc()->init(); - set_phase(active_workers, true /* concurrent */); + + // _g1h has _n_par_threads + + _parallel_marking_threads = calc_parallel_marking_threads(); + assert(parallel_marking_threads() <= max_parallel_marking_threads(), + "Maximum number of marking threads exceeded"); + _parallel_workers->set_active_workers((int)_parallel_marking_threads); + // Don't set _n_par_threads because it affects MT in proceess_strong_roots() + // and the decisions on that MT processing is made elsewhere. + + assert( _parallel_workers->active_workers() > 0, "Should have been set"); + set_phase(_parallel_workers->active_workers(), true /* concurrent */); CMConcurrentMarkingTask markingTask(this, cmThread()); if (parallel_marking_threads() > 0) { @@ -1181,6 +1228,7 @@ true /* expected_active */); if (VerifyDuringGC) { + HandleMark hm; // handle scope gclog_or_tty->print(" VerifyDuringGC:(after)"); Universe::heap()->prepare_for_verify(); @@ -1463,12 +1511,20 @@ G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, BitMap* region_bm, BitMap* card_bm) : AbstractGangTask("G1 final counting"), _g1h(g1h), - _bm(bm), _region_bm(region_bm), _card_bm(card_bm) { - if (ParallelGCThreads > 0) { - _n_workers = _g1h->workers()->total_workers(); + _bm(bm), _region_bm(region_bm), _card_bm(card_bm), + _n_workers(0) + { + // Use the value already set as the number of active threads + // in the call to run_task(). Needed for the allocation of + // _live_bytes and _used_bytes. + if (G1CollectedHeap::use_parallel_gc_threads()) { + assert( _g1h->workers()->active_workers() > 0, + "Should have been previously set"); + _n_workers = _g1h->workers()->active_workers(); } else { _n_workers = 1; } + _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); } @@ -1485,6 +1541,7 @@ calccl.no_yield(); if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&calccl, i, + (int) _n_workers, HeapRegion::FinalCountClaimValue); } else { _g1h->heap_region_iterate(&calccl); @@ -1530,10 +1587,42 @@ FreeRegionList* local_cleanup_list, OldRegionSet* old_proxy_set, HumongousRegionSet* humongous_proxy_set, - HRRSCleanupTask* hrrs_cleanup_task); + HRRSCleanupTask* hrrs_cleanup_task) : + _g1(g1), _worker_num(worker_num), + _max_live_bytes(0), _regions_claimed(0), + _freed_bytes(0), + _claimed_region_time(0.0), _max_region_time(0.0), + _local_cleanup_list(local_cleanup_list), + _old_proxy_set(old_proxy_set), + _humongous_proxy_set(humongous_proxy_set), + _hrrs_cleanup_task(hrrs_cleanup_task) { } + size_t freed_bytes() { return _freed_bytes; } - bool doHeapRegion(HeapRegion *r); + bool doHeapRegion(HeapRegion *hr) { + // We use a claim value of zero here because all regions + // were claimed with value 1 in the FinalCount task. + hr->reset_gc_time_stamp(); + if (!hr->continuesHumongous()) { + double start = os::elapsedTime(); + _regions_claimed++; + hr->note_end_of_marking(); + _max_live_bytes += hr->max_live_bytes(); + _g1->free_region_if_empty(hr, + &_freed_bytes, + _local_cleanup_list, + _old_proxy_set, + _humongous_proxy_set, + _hrrs_cleanup_task, + true /* par */); + double region_time = (os::elapsedTime() - start); + _claimed_region_time += region_time; + if (region_time > _max_region_time) { + _max_region_time = region_time; + } + } + return false; + } size_t max_live_bytes() { return _max_live_bytes; } size_t regions_claimed() { return _regions_claimed; } @@ -1568,6 +1657,7 @@ &hrrs_cleanup_task); if (G1CollectedHeap::use_parallel_gc_threads()) { _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, + _g1h->workers()->active_workers(), HeapRegion::NoteEndClaimValue); } else { _g1h->heap_region_iterate(&g1_note_end); @@ -1644,47 +1734,6 @@ }; -G1NoteEndOfConcMarkClosure:: -G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, - int worker_num, - FreeRegionList* local_cleanup_list, - OldRegionSet* old_proxy_set, - HumongousRegionSet* humongous_proxy_set, - HRRSCleanupTask* hrrs_cleanup_task) - : _g1(g1), _worker_num(worker_num), - _max_live_bytes(0), _regions_claimed(0), - _freed_bytes(0), - _claimed_region_time(0.0), _max_region_time(0.0), - _local_cleanup_list(local_cleanup_list), - _old_proxy_set(old_proxy_set), - _humongous_proxy_set(humongous_proxy_set), - _hrrs_cleanup_task(hrrs_cleanup_task) { } - -bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { - // We use a claim value of zero here because all regions - // were claimed with value 1 in the FinalCount task. - hr->reset_gc_time_stamp(); - if (!hr->continuesHumongous()) { - double start = os::elapsedTime(); - _regions_claimed++; - hr->note_end_of_marking(); - _max_live_bytes += hr->max_live_bytes(); - _g1->free_region_if_empty(hr, - &_freed_bytes, - _local_cleanup_list, - _old_proxy_set, - _humongous_proxy_set, - _hrrs_cleanup_task, - true /* par */); - double region_time = (os::elapsedTime() - start); - _claimed_region_time += region_time; - if (region_time > _max_region_time) { - _max_region_time = region_time; - } - } - return false; -} - void ConcurrentMark::cleanup() { // world is stopped at this checkpoint assert(SafepointSynchronize::is_at_safepoint(), @@ -1716,6 +1765,9 @@ HeapRegionRemSet::reset_for_cleanup_tasks(); + g1h->set_par_threads(); + size_t n_workers = g1h->n_par_threads(); + // Do counting once more with the world stopped for good measure. G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), &_region_bm, &_card_bm); @@ -1724,9 +1776,10 @@ HeapRegion::InitialClaimValue), "sanity check"); - int n_workers = g1h->workers()->total_workers(); - g1h->set_par_threads(n_workers); + assert(g1h->n_par_threads() == (int) n_workers, + "Should not have been reset"); g1h->workers()->run_task(&g1_par_count_task); + // Done with the parallel phase so reset to 0. g1h->set_par_threads(0); assert(g1h->check_heap_region_claim_values( @@ -1776,8 +1829,7 @@ double note_end_start = os::elapsedTime(); G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); if (G1CollectedHeap::use_parallel_gc_threads()) { - int n_workers = g1h->workers()->total_workers(); - g1h->set_par_threads(n_workers); + g1h->set_par_threads((int)n_workers); g1h->workers()->run_task(&g1_par_note_end_task); g1h->set_par_threads(0); @@ -1806,8 +1858,7 @@ double rs_scrub_start = os::elapsedTime(); G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); if (G1CollectedHeap::use_parallel_gc_threads()) { - int n_workers = g1h->workers()->total_workers(); - g1h->set_par_threads(n_workers); + g1h->set_par_threads((int)n_workers); g1h->workers()->run_task(&g1_par_scrub_rs_task); g1h->set_par_threads(0); @@ -1825,7 +1876,7 @@ // this will also free any regions totally full of garbage objects, // and sort the regions. - g1h->g1_policy()->record_concurrent_mark_cleanup_end(); + g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); // Statistics. double end = os::elapsedTime(); @@ -1991,16 +2042,12 @@ class G1CMParKeepAliveAndDrainClosure: public OopClosure { ConcurrentMark* _cm; CMTask* _task; - CMBitMap* _bitMap; int _ref_counter_limit; int _ref_counter; public: - G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, - CMTask* task, - CMBitMap* bitMap) : - _cm(cm), _task(task), _bitMap(bitMap), - _ref_counter_limit(G1RefProcDrainInterval) - { + G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : + _cm(cm), _task(task), + _ref_counter_limit(G1RefProcDrainInterval) { assert(_ref_counter_limit > 0, "sanity"); _ref_counter = _ref_counter_limit; } @@ -2091,19 +2138,16 @@ private: G1CollectedHeap* _g1h; ConcurrentMark* _cm; - CMBitMap* _bitmap; WorkGang* _workers; int _active_workers; public: G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, ConcurrentMark* cm, - CMBitMap* bitmap, WorkGang* workers, int n_workers) : - _g1h(g1h), _cm(cm), _bitmap(bitmap), - _workers(workers), _active_workers(n_workers) - { } + _g1h(g1h), _cm(cm), + _workers(workers), _active_workers(n_workers) { } // Executes the given task using concurrent marking worker threads. virtual void execute(ProcessTask& task); @@ -2115,21 +2159,18 @@ ProcessTask& _proc_task; G1CollectedHeap* _g1h; ConcurrentMark* _cm; - CMBitMap* _bitmap; public: G1CMRefProcTaskProxy(ProcessTask& proc_task, G1CollectedHeap* g1h, - ConcurrentMark* cm, - CMBitMap* bitmap) : + ConcurrentMark* cm) : AbstractGangTask("Process reference objects in parallel"), - _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) - {} + _proc_task(proc_task), _g1h(g1h), _cm(cm) { } virtual void work(int i) { CMTask* marking_task = _cm->task(i); G1CMIsAliveClosure g1_is_alive(_g1h); - G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); + G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); @@ -2139,7 +2180,7 @@ void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { assert(_workers != NULL, "Need parallel worker threads."); - G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); + G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); // We need to reset the phase for each task execution so that // the termination protocol of CMTask::do_marking_step works. @@ -2156,8 +2197,7 @@ public: G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : AbstractGangTask("Enqueue reference objects in parallel"), - _enq_task(enq_task) - { } + _enq_task(enq_task) { } virtual void work(int i) { _enq_task.work(i); @@ -2207,10 +2247,10 @@ // We use the work gang from the G1CollectedHeap and we utilize all // the worker threads. - int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; + int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); - G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), + G1CMRefProcTaskExecutor par_task_executor(g1h, this, g1h->workers(), active_workers); if (rp->processing_is_mt()) { @@ -2290,7 +2330,9 @@ } CMRemarkTask(ConcurrentMark* cm) : - AbstractGangTask("Par Remark"), _cm(cm) { } + AbstractGangTask("Par Remark"), _cm(cm) { + _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers()); + } }; void ConcurrentMark::checkpointRootsFinalWork() { @@ -2302,16 +2344,21 @@ if (G1CollectedHeap::use_parallel_gc_threads()) { G1CollectedHeap::StrongRootsScope srs(g1h); - // this is remark, so we'll use up all available threads - int active_workers = ParallelGCThreads; + // this is remark, so we'll use up all active threads + int active_workers = g1h->workers()->active_workers(); + if (active_workers == 0) { + assert(active_workers > 0, "Should have been set earlier"); + active_workers = ParallelGCThreads; + g1h->workers()->set_active_workers(active_workers); + } set_phase(active_workers, false /* concurrent */); + // Leave _parallel_marking_threads at it's + // value originally calculated in the ConcurrentMark + // constructor and pass values of the active workers + // through the gang in the task. CMRemarkTask remarkTask(this); - // We will start all available threads, even if we decide that the - // active_workers will be fewer. The extra ones will just bail out - // immediately. - int n_workers = g1h->workers()->total_workers(); - g1h->set_par_threads(n_workers); + g1h->set_par_threads(active_workers); g1h->workers()->run_task(&remarkTask); g1h->set_par_threads(0); } else { @@ -2859,8 +2906,10 @@ } } -class CSMarkOopClosure: public OopClosure { - friend class CSMarkBitMapClosure; +// Closures used by ConcurrentMark::complete_marking_in_collection_set(). + +class CSetMarkOopClosure: public OopClosure { + friend class CSetMarkBitMapClosure; G1CollectedHeap* _g1h; CMBitMap* _bm; @@ -2870,6 +2919,7 @@ int _ms_size; int _ms_ind; int _array_increment; + int _worker_i; bool push(oop obj, int arr_ind = 0) { if (_ms_ind == _ms_size) { @@ -2910,7 +2960,6 @@ for (int j = arr_ind; j < lim; j++) { do_oop(aobj->objArrayOopDesc::obj_at_addr(j)); } - } else { obj->oop_iterate(this); } @@ -2920,17 +2969,17 @@ } public: - CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : + CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : _g1h(G1CollectedHeap::heap()), _cm(cm), _bm(cm->nextMarkBitMap()), _ms_size(ms_size), _ms_ind(0), _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), - _array_increment(MAX2(ms_size/8, 16)) - {} - - ~CSMarkOopClosure() { + _array_increment(MAX2(ms_size/8, 16)), + _worker_i(worker_i) { } + + ~CSetMarkOopClosure() { FREE_C_HEAP_ARRAY(oop, _ms); FREE_C_HEAP_ARRAY(jint, _array_ind_stack); } @@ -2953,10 +3002,11 @@ if (hr != NULL) { if (hr->in_collection_set()) { if (_g1h->is_obj_ill(obj)) { - _bm->mark((HeapWord*)obj); - if (!push(obj)) { - gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); - set_abort(); + if (_bm->parMark((HeapWord*)obj)) { + if (!push(obj)) { + gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); + set_abort(); + } } } } else { @@ -2967,19 +3017,19 @@ } }; -class CSMarkBitMapClosure: public BitMapClosure { - G1CollectedHeap* _g1h; - CMBitMap* _bitMap; - ConcurrentMark* _cm; - CSMarkOopClosure _oop_cl; +class CSetMarkBitMapClosure: public BitMapClosure { + G1CollectedHeap* _g1h; + CMBitMap* _bitMap; + ConcurrentMark* _cm; + CSetMarkOopClosure _oop_cl; + int _worker_i; + public: - CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : + CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : _g1h(G1CollectedHeap::heap()), _bitMap(cm->nextMarkBitMap()), - _oop_cl(cm, ms_size) - {} - - ~CSMarkBitMapClosure() {} + _oop_cl(cm, ms_size, worker_i), + _worker_i(worker_i) { } bool do_bit(size_t offset) { // convert offset into a HeapWord* @@ -3001,53 +3051,69 @@ } }; - -class CompleteMarkingInCSHRClosure: public HeapRegionClosure { - CMBitMap* _bm; - CSMarkBitMapClosure _bit_cl; +class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { + CMBitMap* _bm; + CSetMarkBitMapClosure _bit_cl; + int _worker_i; + enum SomePrivateConstants { MSSize = 1000 }; - bool _completed; + public: - CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : + CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : _bm(cm->nextMarkBitMap()), - _bit_cl(cm, MSSize), - _completed(true) - {} - - ~CompleteMarkingInCSHRClosure() {} - - bool doHeapRegion(HeapRegion* r) { - if (!r->evacuation_failed()) { - MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); - if (!mr.is_empty()) { - if (!_bm->iterate(&_bit_cl, mr)) { - _completed = false; - return true; + _bit_cl(cm, MSSize, worker_i), + _worker_i(worker_i) { } + + bool doHeapRegion(HeapRegion* hr) { + if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { + // The current worker has successfully claimed the region. + if (!hr->evacuation_failed()) { + MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); + if (!mr.is_empty()) { + bool done = false; + while (!done) { + done = _bm->iterate(&_bit_cl, mr); + } } } } return false; } - - bool completed() { return _completed; } }; -class ClearMarksInHRClosure: public HeapRegionClosure { - CMBitMap* _bm; +class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { + jint _claim_value; + public: - ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } - - bool doHeapRegion(HeapRegion* r) { - if (!r->used_region().is_empty() && !r->evacuation_failed()) { - MemRegion usedMR = r->used_region(); - _bm->clearRange(r->used_region()); - } + SetClaimValuesInCSetHRClosure(jint claim_value) : + _claim_value(claim_value) { } + + bool doHeapRegion(HeapRegion* hr) { + hr->set_claim_value(_claim_value); return false; } }; +class G1ParCompleteMarkInCSetTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + +public: + G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, + ConcurrentMark* cm) : + AbstractGangTask("Complete Mark in CSet"), + _g1h(g1h), _cm(cm) { } + + void work(int worker_i) { + CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); + HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); + _g1h->collection_set_iterate_from(hr, &cmplt); + } +}; + void ConcurrentMark::complete_marking_in_collection_set() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); @@ -3056,20 +3122,32 @@ return; } - int i = 1; double start = os::elapsedTime(); - while (true) { - i++; - CompleteMarkingInCSHRClosure cmplt(this); - g1h->collection_set_iterate(&cmplt); - if (cmplt.completed()) break; + int n_workers = g1h->workers()->total_workers(); + + G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); + + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + g1h->set_par_threads(n_workers); + g1h->workers()->run_task(&complete_mark_task); + g1h->set_par_threads(0); + } else { + complete_mark_task.work(0); } + + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); + + // Now reset the claim values in the regions in the collection set. + SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); + g1h->collection_set_iterate(&set_cv_cl); + + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); + double end_time = os::elapsedTime(); double elapsed_time_ms = (end_time - start) * 1000.0; g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); - - ClearMarksInHRClosure clr(nextMarkBitMap()); - g1h->collection_set_iterate(&clr); } // The next two methods deal with the following optimisation. Some diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -360,7 +360,7 @@ friend class ConcurrentMarkThread; friend class CMTask; friend class CMBitMapClosure; - friend class CSMarkOopClosure; + friend class CSetMarkOopClosure; friend class CMGlobalObjectClosure; friend class CMRemarkTask; friend class CMConcurrentMarkingTask; @@ -375,7 +375,9 @@ ConcurrentMarkThread* _cmThread; // the thread doing the work G1CollectedHeap* _g1h; // the heap. size_t _parallel_marking_threads; // the number of marking - // threads we'll use + // threads we're use + size_t _max_parallel_marking_threads; // max number of marking + // threads we'll ever use double _sleep_factor; // how much we have to sleep, with // respect to the work we just did, to // meet the marking overhead goal @@ -473,7 +475,7 @@ double* _accum_task_vtime; // accumulated task vtime - WorkGang* _parallel_workers; + FlexibleWorkGang* _parallel_workers; ForceOverflowSettings _force_overflow_conc; ForceOverflowSettings _force_overflow_stw; @@ -504,6 +506,7 @@ // accessor methods size_t parallel_marking_threads() { return _parallel_marking_threads; } + size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;} double sleep_factor() { return _sleep_factor; } double marking_task_overhead() { return _marking_task_overhead;} double cleanup_sleep_factor() { return _cleanup_sleep_factor; } @@ -709,6 +712,14 @@ CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; } CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } + // Returns the number of GC threads to be used in a concurrent + // phase based on the number of GC threads being used in a STW + // phase. + size_t scale_parallel_threads(size_t n_par_threads); + + // Calculates the number of GC threads to be used in a concurrent phase. + int calc_parallel_marking_threads(); + // The following three are interaction between CM and // G1CollectedHeap diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -191,7 +191,11 @@ VM_CGC_Operation op(&cl_cl, verbose_str); VMThread::execute(&op); } else { + // We don't want to update the marking status if a GC pause + // is already underway. + _sts.join(); g1h->set_marking_complete(); + _sts.leave(); } // Check if cleanup set the free_regions_coming flag. If it diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -66,6 +66,18 @@ // apply to TLAB allocation, which is not part of this interface: it // is done by clients of this interface.) +// Notes on implementation of parallelism in different tasks. +// +// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism. +// The number of GC workers is passed to heap_region_par_iterate_chunked(). +// It does use run_task() which sets _n_workers in the task. +// G1ParTask executes g1_process_strong_roots() -> +// SharedHeap::process_strong_roots() which calls eventuall to +// CardTableModRefBS::par_non_clean_card_iterate_work() which uses +// SequentialSubTasksDone. SharedHeap::process_strong_roots() also +// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap). +// + // Local to this file. class RefineCardTableEntryClosure: public CardTableEntryClosure { @@ -176,8 +188,7 @@ hr->set_next_young_region(_head); _head = hr; - hr->set_young(); - double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length); + _g1h->g1_policy()->set_region_eden(hr, (int) _length); ++_length; } @@ -190,7 +201,6 @@ _survivor_tail = hr; } _survivor_head = hr; - ++_survivor_length; } @@ -315,16 +325,20 @@ _g1h->g1_policy()->note_start_adding_survivor_regions(); _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */); + int young_index_in_cset = 0; for (HeapRegion* curr = _survivor_head; curr != NULL; curr = curr->get_next_young_region()) { - _g1h->g1_policy()->set_region_survivors(curr); + _g1h->g1_policy()->set_region_survivor(curr, young_index_in_cset); // The region is a non-empty survivor so let's add it to // the incremental collection set for the next evacuation // pause. _g1h->g1_policy()->add_region_to_incremental_cset_rhs(curr); - } + young_index_in_cset += 1; + } + assert((size_t) young_index_in_cset == _survivor_length, + "post-condition"); _g1h->g1_policy()->note_stop_adding_survivor_regions(); _head = _survivor_head; @@ -1154,6 +1168,7 @@ void work(int i) { RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, + _g1->workers()->active_workers(), HeapRegion::RebuildRSClaimValue); } }; @@ -1358,12 +1373,32 @@ } // Rebuild remembered sets of all regions. - if (G1CollectedHeap::use_parallel_gc_threads()) { + int n_workers = + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), + workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "If not dynamic should be using all the workers"); + workers()->set_active_workers(n_workers); + // Set parallel threads in the heap (_n_par_threads) only + // before a parallel phase and always reset it to 0 after + // the phase so that the number of parallel threads does + // no get carried forward to a serial phase where there + // may be code that is "possibly_parallel". + set_par_threads(n_workers); + ParRebuildRSTask rebuild_rs_task(this); assert(check_heap_region_claim_values( HeapRegion::InitialClaimValue), "sanity check"); - set_par_threads(workers()->total_workers()); + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "Unless dynamic should use total workers"); + // Use the most recent number of active workers + assert(workers()->active_workers() > 0, + "Active workers not properly set"); + set_par_threads(workers()->active_workers()); workers()->run_task(&rebuild_rs_task); set_par_threads(0); assert(check_heap_region_claim_values( @@ -2475,11 +2510,17 @@ void G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, int worker, + int no_of_par_workers, jint claim_value) { const size_t regions = n_regions(); - const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); + const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + no_of_par_workers : + 1); + assert(UseDynamicNumberOfGCThreads || + no_of_par_workers == workers()->total_workers(), + "Non dynamic should use fixed number of workers"); // try to spread out the starting points of the workers - const size_t start_index = regions / worker_num * (size_t) worker; + const size_t start_index = regions / max_workers * (size_t) worker; // each worker will actually look at all regions for (size_t count = 0; count < regions; ++count) { @@ -2576,10 +2617,10 @@ _claim_value(claim_value), _failures(0), _sh_region(NULL) { } bool doHeapRegion(HeapRegion* r) { if (r->claim_value() != _claim_value) { - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + gclog_or_tty->print_cr("Region " HR_FORMAT ", " "claim value = %d, should be %d", - r->bottom(), r->end(), r->claim_value(), - _claim_value); + HR_FORMAT_PARAMS(r), + r->claim_value(), _claim_value); ++_failures; } if (!r->isHumongous()) { @@ -2588,9 +2629,9 @@ _sh_region = r; } else if (r->continuesHumongous()) { if (r->humongous_start_region() != _sh_region) { - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " + gclog_or_tty->print_cr("Region " HR_FORMAT ", " "HS = "PTR_FORMAT", should be "PTR_FORMAT, - r->bottom(), r->end(), + HR_FORMAT_PARAMS(r), r->humongous_start_region(), _sh_region); ++_failures; @@ -2608,8 +2649,63 @@ heap_region_iterate(&cl); return cl.failures() == 0; } + +class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure { + jint _claim_value; + size_t _failures; + +public: + CheckClaimValuesInCSetHRClosure(jint claim_value) : + _claim_value(claim_value), + _failures(0) { } + + size_t failures() { + return _failures; + } + + bool doHeapRegion(HeapRegion* hr) { + assert(hr->in_collection_set(), "how?"); + assert(!hr->isHumongous(), "H-region in CSet"); + if (hr->claim_value() != _claim_value) { + gclog_or_tty->print_cr("CSet Region " HR_FORMAT ", " + "claim value = %d, should be %d", + HR_FORMAT_PARAMS(hr), + hr->claim_value(), _claim_value); + _failures += 1; + } + return false; + } +}; + +bool G1CollectedHeap::check_cset_heap_region_claim_values(jint claim_value) { + CheckClaimValuesInCSetHRClosure cl(claim_value); + collection_set_iterate(&cl); + return cl.failures() == 0; +} #endif // ASSERT +// We want the parallel threads to start their collection +// set iteration at different collection set regions to +// avoid contention. +// If we have: +// n collection set regions +// p threads +// Then thread t will start at region t * floor (n/p) + +HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) { + HeapRegion* result = g1_policy()->collection_set(); + if (G1CollectedHeap::use_parallel_gc_threads()) { + size_t cs_size = g1_policy()->cset_region_length(); + int n_workers = workers()->total_workers(); + size_t cs_spans = cs_size / n_workers; + size_t ind = cs_spans * worker_i; + for (size_t i = 0; i < ind; i++) { + result = result->next_in_collection_set(); + } + } + return result; +} + void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) { HeapRegion* r = g1_policy()->collection_set(); while (r != NULL) { @@ -2918,6 +3014,7 @@ HandleMark hm; VerifyRegionClosure blk(_allow_dirty, true, _vo); _g1h->heap_region_par_iterate_chunked(&blk, worker_i, + _g1h->workers()->active_workers(), HeapRegion::ParVerifyClaimValue); if (blk.failures()) { _failures = true; @@ -2935,6 +3032,10 @@ if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); } VerifyRootsClosure rootsCl(vo); + + assert(Thread::current()->is_VM_thread(), + "Expected to be executed serially by the VM thread at this point"); + CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); // We apply the relevant closures to all the oops in the @@ -2979,7 +3080,10 @@ "sanity check"); G1ParVerifyTask task(this, allow_dirty, vo); - int n_workers = workers()->total_workers(); + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "If not dynamic should be using all the workers"); + int n_workers = workers()->active_workers(); set_par_threads(n_workers); workers()->run_task(&task); set_par_threads(0); @@ -2987,6 +3091,8 @@ failures = true; } + // Checks that the expected amount of parallel work was done. + // The implication is that n_workers is > 0. assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue), "sanity check"); @@ -3210,8 +3316,6 @@ } } -// - double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr, bool young) { return _g1_policy->predict_region_elapsed_time_ms(hr, young); @@ -3251,7 +3355,7 @@ void G1CollectedHeap::setup_surviving_young_words() { guarantee( _surviving_young_words == NULL, "pre-condition" ); - size_t array_length = g1_policy()->young_cset_length(); + size_t array_length = g1_policy()->young_cset_region_length(); _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length); if (_surviving_young_words == NULL) { vm_exit_out_of_memory(sizeof(size_t) * array_length, @@ -3268,7 +3372,7 @@ void G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - size_t array_length = g1_policy()->young_cset_length(); + size_t array_length = g1_policy()->young_cset_region_length(); for (size_t i = 0; i < array_length; ++i) _surviving_young_words[i] += surv_young_words[i]; } @@ -3280,8 +3384,6 @@ _surviving_young_words = NULL; } -// - #ifdef ASSERT class VerifyCSetClosure: public HeapRegionClosure { public: @@ -3404,6 +3506,10 @@ assert(check_young_list_well_formed(), "young list should be well formed"); + // Don't dynamically change the number of GC threads this early. A value of + // 0 is used to indicate serial work. When parallel work is done, + // it will be set. + { // Call to jvmpi::post_class_unload_events must occur outside of active GC IsGCActiveMark x; @@ -3617,7 +3723,8 @@ double end_time_sec = os::elapsedTime(); double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; g1_policy()->record_pause_time_ms(pause_time_ms); - g1_policy()->record_collection_pause_end(); + int active_gc_threads = workers()->active_workers(); + g1_policy()->record_collection_pause_end(active_gc_threads); MemoryService::track_memory_usage(); @@ -4158,7 +4265,7 @@ // non-young regions (where the age is -1) // We also add a few elements at the beginning and at the end in // an attempt to eliminate cache contention - size_t real_length = 1 + _g1h->g1_policy()->young_cset_length(); + size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length(); size_t array_length = PADDING_ELEM_NUM + real_length + PADDING_ELEM_NUM; @@ -4564,13 +4671,13 @@ } public: - G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) + G1ParTask(G1CollectedHeap* g1h, + RefToScanQueueSet *task_queues) : AbstractGangTask("G1 collection"), _g1h(g1h), _queues(task_queues), - _terminator(workers, _queues), - _stats_lock(Mutex::leaf, "parallel G1 stats lock", true), - _n_workers(workers) + _terminator(0, _queues), + _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) {} RefToScanQueueSet* queues() { return _queues; } @@ -4579,6 +4686,20 @@ return queues()->queue(i); } + ParallelTaskTerminator* terminator() { return &_terminator; } + + virtual void set_for_termination(int active_workers) { + // This task calls set_n_termination() in par_non_clean_card_iterate_work() + // in the young space (_par_seq_tasks) in the G1 heap + // for SequentialSubTasksDone. + // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap + // both of which need setting by set_n_termination(). + _g1h->SharedHeap::set_n_termination(active_workers); + _g1h->set_n_termination(active_workers); + terminator()->reset_for_reuse(active_workers); + _n_workers = active_workers; + } + void work(int i) { if (i >= _n_workers) return; // no work needed this round @@ -4863,12 +4984,12 @@ private: G1CollectedHeap* _g1h; RefToScanQueueSet* _queues; - WorkGang* _workers; + FlexibleWorkGang* _workers; int _active_workers; public: G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, - WorkGang* workers, + FlexibleWorkGang* workers, RefToScanQueueSet *task_queues, int n_workers) : _g1h(g1h), @@ -5124,11 +5245,13 @@ // referents points to another object which is also referenced by an // object discovered by the STW ref processor. - int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? - workers()->total_workers() : 1); - - set_par_threads(n_workers); - G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues); + int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + workers()->active_workers() : 1); + + assert(active_workers == workers()->active_workers(), + "Need to reset active_workers"); + set_par_threads(active_workers); + G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues); if (G1CollectedHeap::use_parallel_gc_threads()) { workers()->run_task(&keep_cm_referents); @@ -5194,7 +5317,6 @@ NULL); } else { // Parallel reference processing - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); assert(rp->num_q() == active_workers, "sanity"); assert(active_workers <= rp->max_num_q(), "sanity"); @@ -5227,7 +5349,9 @@ } else { // Parallel reference enqueuing - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1); + assert(active_workers == workers()->active_workers(), + "Need to reset active_workers"); assert(rp->num_q() == active_workers, "sanity"); assert(active_workers <= rp->max_num_q(), "sanity"); @@ -5254,9 +5378,24 @@ concurrent_g1_refine()->set_use_cache(false); concurrent_g1_refine()->clear_hot_cache_claimed_index(); - int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); - G1ParTask g1_par_task(this, n_workers, _task_queues); + int n_workers; + if (G1CollectedHeap::use_parallel_gc_threads()) { + n_workers = + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), + workers()->active_workers(), + Threads::number_of_non_daemon_threads()); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "If not dynamic should be using all the workers"); + set_par_threads(n_workers); + } else { + assert(n_par_threads() == 0, + "Should be the original non-parallel value"); + n_workers = 1; + } + workers()->set_active_workers(n_workers); + + G1ParTask g1_par_task(this, _task_queues); init_for_evac_failure(NULL); @@ -5269,6 +5408,10 @@ // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); + // These tasks use ShareHeap::_process_strong_tasks + assert(UseDynamicNumberOfGCThreads || + workers()->active_workers() == workers()->total_workers(), + "If not dynamic should be using all the workers"); workers()->run_task(&g1_par_task); } else { StrongRootsScope srs(this); @@ -5277,6 +5420,7 @@ double par_time = (os::elapsedTime() - start_par) * 1000.0; g1_policy()->record_par_time(par_time); + set_par_threads(0); // Process any discovered reference objects - we have @@ -5304,8 +5448,11 @@ finalize_for_evac_failure(); - // Must do this before removing self-forwarding pointers, which clears - // the per-region evac-failure flags. + // Must do this before clearing the per-region evac-failure flags + // (which is currently done when we free the collection set). + // We also only do this if marking is actually in progress and so + // have to do this before we set the mark_in_progress flag at the + // end of an initial mark pause. concurrent_mark()->complete_marking_in_collection_set(); if (evacuation_failed()) { @@ -5567,7 +5714,6 @@ while (cur != NULL) { assert(!is_on_master_free_list(cur), "sanity"); - if (non_young) { if (cur->is_young()) { double end_sec = os::elapsedTime(); @@ -5578,12 +5724,14 @@ non_young = false; } } else { - double end_sec = os::elapsedTime(); - double elapsed_ms = (end_sec - start_sec) * 1000.0; - young_time_ms += elapsed_ms; - - start_sec = os::elapsedTime(); - non_young = true; + if (!cur->is_young()) { + double end_sec = os::elapsedTime(); + double elapsed_ms = (end_sec - start_sec) * 1000.0; + young_time_ms += elapsed_ms; + + start_sec = os::elapsedTime(); + non_young = true; + } } rs_lengths += cur->rem_set()->occupied(); @@ -5595,8 +5743,8 @@ if (cur->is_young()) { int index = cur->young_index_in_cset(); - guarantee( index != -1, "invariant" ); - guarantee( (size_t)index < policy->young_cset_length(), "invariant" ); + assert(index != -1, "invariant"); + assert((size_t) index < policy->young_cset_region_length(), "invariant"); size_t words_survived = _surviving_young_words[index]; cur->record_surv_words_in_group(words_survived); @@ -5607,7 +5755,7 @@ cur->set_next_young_region(NULL); } else { int index = cur->young_index_in_cset(); - guarantee( index == -1, "invariant" ); + assert(index == -1, "invariant"); } assert( (cur->is_young() && cur->young_index_in_cset() > -1) || @@ -5615,13 +5763,26 @@ "invariant" ); if (!cur->evacuation_failed()) { + MemRegion used_mr = cur->used_region(); + // And the region is empty. - assert(!cur->is_empty(), "Should not have empty regions in a CS."); + assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); + + // If marking is in progress then clear any objects marked in + // the current region. Note mark_in_progress() returns false, + // even during an initial mark pause, until the set_marking_started() + // call which takes place later in the pause. + if (mark_in_progress()) { + assert(!g1_policy()->during_initial_mark_pause(), "sanity"); + _cm->nextMarkBitMap()->clearRange(used_mr); + } + free_region(cur, &pre_used, &local_free_list, false /* par */); } else { cur->uninstall_surv_rate_group(); - if (cur->is_young()) + if (cur->is_young()) { cur->set_young_index_in_cset(-1); + } cur->set_not_young(); cur->set_evacuation_failed(false); // The region is now considered to be old. @@ -5635,10 +5796,12 @@ double end_sec = os::elapsedTime(); double elapsed_ms = (end_sec - start_sec) * 1000.0; - if (non_young) + + if (non_young) { non_young_time_ms += elapsed_ms; - else + } else { young_time_ms += elapsed_ms; + } update_sets_after_freeing_regions(pre_used, &local_free_list, NULL /* old_proxy_set */, @@ -5722,7 +5885,6 @@ assert(heap_lock_held_for_gc(), "the heap lock should already be held by or for this thread"); _young_list->push_region(hr); - g1_policy()->set_region_short_lived(hr); } class NoYoungRegionsClosure: public HeapRegionClosure { @@ -5880,7 +6042,6 @@ HeapRegion* new_alloc_region = new_region(word_size, false /* do_expand */); if (new_alloc_region != NULL) { - g1_policy()->update_region_num(true /* next_is_young */); set_region_short_lived_locked(new_alloc_region); _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full); return new_alloc_region; @@ -5908,6 +6069,21 @@ return _g1h->new_mutator_alloc_region(word_size, force); } +void G1CollectedHeap::set_par_threads() { + // Don't change the number of workers. Use the value previously set + // in the workgroup. + int n_workers = workers()->active_workers(); + assert(UseDynamicNumberOfGCThreads || + n_workers == workers()->total_workers(), + "Otherwise should be using the total number of workers"); + if (n_workers == 0) { + assert(false, "Should have been set in prior evacuation pause."); + n_workers = ParallelGCThreads; + workers()->set_active_workers(n_workers); + } + set_par_threads(n_workers); +} + void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, size_t allocated_bytes) { _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -987,6 +987,16 @@ void set_par_threads(int t) { SharedHeap::set_par_threads(t); + // Done in SharedHeap but oddly there are + // two _process_strong_tasks's in a G1CollectedHeap + // so do it here too. + _process_strong_tasks->set_n_threads(t); + } + + // Set _n_par_threads according to a policy TBD. + void set_par_threads(); + + void set_n_termination(int t) { _process_strong_tasks->set_n_threads(t); } @@ -1276,6 +1286,7 @@ // i.e., that a closure never attempt to abort a traversal. void heap_region_par_iterate_chunked(HeapRegionClosure* blk, int worker, + int no_of_par_workers, jint claim_value); // It resets all the region claim values to the default. @@ -1283,8 +1294,17 @@ #ifdef ASSERT bool check_heap_region_claim_values(jint claim_value); + + // Same as the routine above but only checks regions in the + // current collection set. + bool check_cset_heap_region_claim_values(jint claim_value); #endif // ASSERT + // Given the id of a worker, calculate a suitable + // starting region for iterating over the current + // collection set. + HeapRegion* start_cset_region_for_worker(int worker_i); + // Iterate over the regions (if any) in the current collection set. void collection_set_iterate(HeapRegionClosure* blk); @@ -1610,16 +1630,12 @@ public: void stop_conc_gc_threads(); - // - double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); void check_if_region_is_too_expensive(double predicted_time_ms); size_t pending_card_num(); size_t max_pending_card_num(); size_t cards_scanned(); - // - protected: size_t _max_heap_capacity; }; diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -36,10 +36,6 @@ #include "runtime/mutexLocker.hpp" #include "utilities/debug.hpp" -#define PREDICTIONS_VERBOSE 0 - -// - // Different defaults for different number of GC threads // They were chosen by running GCOld and SPECjbb on debris with different // numbers of GC threads and choosing them based on the results @@ -80,8 +76,6 @@ 1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30 }; -// - // Help class for avoiding interleaved logging class LineBuffer: public StackObj { @@ -137,10 +131,6 @@ _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1), - _n_pauses(0), - _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), - _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), - _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)), _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _all_pause_times_ms(new NumberSeq()), _stop_world_start(0.0), @@ -148,11 +138,10 @@ _all_yield_times_ms(new NumberSeq()), _using_new_ratio_calculations(false), - _all_mod_union_times_ms(new NumberSeq()), - _summary(new Summary()), _cur_clear_ct_time_ms(0.0), + _mark_closure_time_ms(0.0), _cur_ref_proc_time_ms(0.0), _cur_ref_enq_time_ms(0.0), @@ -165,11 +154,6 @@ _num_cc_clears(0L), #endif - _region_num_young(0), - _region_num_tenured(0), - _prev_region_num_young(0), - _prev_region_num_tenured(0), - _aux_num(10), _all_aux_times_ms(new NumberSeq[_aux_num]), _cur_aux_start_times_ms(new double[_aux_num]), @@ -179,8 +163,6 @@ _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), - // - _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _prev_collection_pause_end_ms(0.0), _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -199,13 +181,10 @@ new TruncatedSeq(TruncatedSeqLength)), _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)), - _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)), _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)), _pause_time_target_ms((double) MaxGCPauseMillis), - // - _full_young_gcs(true), _full_young_pause_num(0), _partial_young_pause_num(0), @@ -221,16 +200,10 @@ _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)), - _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)), - _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)), - _recent_avg_pause_time_ratio(0.0), _all_full_gc_times_ms(new NumberSeq()), - // G1PausesBtwnConcMark defaults to -1 - // so the hack is to do the cast QQQ FIXME - _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), _initiate_conc_mark_if_possible(false), _during_initial_mark_pause(false), _should_revert_to_full_young_gcs(false), @@ -242,22 +215,21 @@ _prev_collection_pause_used_at_end_bytes(0), + _eden_cset_region_length(0), + _survivor_cset_region_length(0), + _old_cset_region_length(0), + _collection_set(NULL), - _collection_set_size(0), _collection_set_bytes_used_before(0), // Incremental CSet attributes _inc_cset_build_state(Inactive), _inc_cset_head(NULL), _inc_cset_tail(NULL), - _inc_cset_size(0), - _inc_cset_young_index(0), _inc_cset_bytes_used_before(0), _inc_cset_max_finger(NULL), - _inc_cset_recorded_young_bytes(0), _inc_cset_recorded_rs_lengths(0), _inc_cset_predicted_elapsed_time_ms(0.0), - _inc_cset_predicted_bytes_to_copy(0), #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away #pragma warning( disable:4355 ) // 'this' : used in base member initializer list @@ -325,8 +297,6 @@ // start conservatively _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis; - // - int index; if (ParallelGCThreads == 0) index = 0; @@ -348,8 +318,6 @@ _non_young_other_cost_per_region_ms_seq->add( non_young_other_cost_per_region_ms_defaults[index]); - // - // Below, we might need to calculate the pause time target based on // the pause interval. When we do so we are going to give G1 maximum // flexibility and allow it to do pauses when it needs to. So, we'll @@ -908,9 +876,6 @@ record_survivor_regions(0, NULL, NULL); - _prev_region_num_young = _region_num_young; - _prev_region_num_tenured = _region_num_tenured; - _free_regions_at_end_of_collection = _g1->free_regions(); // Reset survivors SurvRateGroup. _survivor_surv_rate_group->reset(); @@ -982,10 +947,9 @@ _cur_aux_times_set[i] = false; } - // These are initialized to zero here and they are set during + // This is initialized to zero here and is set during // the evacuation pause if marking is in progress. _cur_satb_drain_time_ms = 0.0; - _last_satb_drain_processed_buffers = 0; _last_young_gc_full = false; @@ -996,10 +960,6 @@ assert( verify_young_ages(), "region age verification" ); } -void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { - _mark_closure_time_ms = mark_closure_time_ms; -} - void G1CollectorPolicy::record_concurrent_mark_init_end(double mark_init_elapsed_time_ms) { _during_marking = true; @@ -1060,7 +1020,7 @@ double total = 0.0; LineBuffer buf(level); buf.append("[%s (ms):", str); - for (uint i = 0; i < ParallelGCThreads; ++i) { + for (uint i = 0; i < no_of_gc_threads(); ++i) { double val = data[i]; if (val < min) min = val; @@ -1070,7 +1030,7 @@ buf.append(" %3.1lf", val); } buf.append_and_print_cr(""); - double avg = total / (double) ParallelGCThreads; + double avg = total / (double) no_of_gc_threads(); buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]", avg, min, max, max - min); } @@ -1082,7 +1042,7 @@ double total = 0.0; LineBuffer buf(level); buf.append("[%s :", str); - for (uint i = 0; i < ParallelGCThreads; ++i) { + for (uint i = 0; i < no_of_gc_threads(); ++i) { double val = data[i]; if (val < min) min = val; @@ -1092,7 +1052,7 @@ buf.append(" %d", (int) val); } buf.append_and_print_cr(""); - double avg = total / (double) ParallelGCThreads; + double avg = total / (double) no_of_gc_threads(); buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]", (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min); } @@ -1112,10 +1072,10 @@ double G1CollectorPolicy::avg_value(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = 0.0; - for (uint i = 0; i < ParallelGCThreads; ++i) { + for (uint i = 0; i < no_of_gc_threads(); ++i) { ret += data[i]; } - return ret / (double) ParallelGCThreads; + return ret / (double) no_of_gc_threads(); } else { return data[0]; } @@ -1124,7 +1084,7 @@ double G1CollectorPolicy::max_value(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double ret = data[0]; - for (uint i = 1; i < ParallelGCThreads; ++i) { + for (uint i = 1; i < no_of_gc_threads(); ++i) { if (data[i] > ret) { ret = data[i]; } @@ -1138,7 +1098,7 @@ double G1CollectorPolicy::sum_of_values(double* data) { if (G1CollectedHeap::use_parallel_gc_threads()) { double sum = 0.0; - for (uint i = 0; i < ParallelGCThreads; i++) { + for (uint i = 0; i < no_of_gc_threads(); i++) { sum += data[i]; } return sum; @@ -1151,7 +1111,7 @@ double ret = data1[0] + data2[0]; if (G1CollectedHeap::use_parallel_gc_threads()) { - for (uint i = 1; i < ParallelGCThreads; ++i) { + for (uint i = 1; i < no_of_gc_threads(); ++i) { double data = data1[i] + data2[i]; if (data > ret) { ret = data; @@ -1164,16 +1124,19 @@ // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1CollectorPolicy::record_collection_pause_end() { +void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) { double end_time_sec = os::elapsedTime(); double elapsed_ms = _last_pause_time_ms; bool parallel = G1CollectedHeap::use_parallel_gc_threads(); + assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(), + "otherwise, the subtraction below does not make sense"); size_t rs_size = - _cur_collection_pause_used_regions_at_start - collection_set_size(); + _cur_collection_pause_used_regions_at_start - cset_region_length(); size_t cur_used_bytes = _g1->used(); assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); bool last_pause_included_initial_mark = false; bool update_stats = !_g1->evacuation_failed(); + set_no_of_gc_threads(no_of_gc_threads); #ifndef PRODUCT if (G1YoungSurvRateVerbose) { @@ -1226,10 +1189,6 @@ _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, end_time_sec, false); - guarantee(_cur_collection_pause_used_regions_at_start >= - collection_set_size(), - "Negative RS size?"); - // This assert is exempted when we're doing parallel collection pauses, // because the fragmentation caused by the parallel GC allocation buffers // can lead to more memory being used during collection than was used @@ -1253,8 +1212,6 @@ (double)surviving_bytes/ (double)_collection_set_bytes_used_before; - _n_pauses++; - // These values are used to update the summary information that is // displayed when TraceGen0Time is enabled, and are output as part // of the PrintGCDetails output, in the non-parallel case. @@ -1291,14 +1248,15 @@ // current value of "other time" other_time_ms -= _cur_clear_ct_time_ms; + // Subtract the time spent completing marking in the collection + // set. Note if marking is not in progress during the pause + // the value of _mark_closure_time_ms will be zero. + other_time_ms -= _mark_closure_time_ms; + // TraceGen0Time and TraceGen1Time summary info updating. _all_pause_times_ms->add(elapsed_ms); if (update_stats) { - _recent_rs_scan_times_ms->add(scan_rs_time); - _recent_pause_times_ms->add(elapsed_ms); - _recent_rs_sizes->add(rs_size); - _summary->record_total_time_ms(elapsed_ms); _summary->record_other_time_ms(other_time_ms); @@ -1342,9 +1300,6 @@ || surviving_bytes <= _collection_set_bytes_used_before, "Or else negative collection!"); - _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before); - _recent_CS_bytes_surviving->add(surviving_bytes); - // this is where we update the allocation rate of the application double app_time_ms = (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms); @@ -1354,13 +1309,17 @@ // We'll just set it to something (arbitrarily) small. app_time_ms = 1.0; } - size_t regions_allocated = - (_region_num_young - _prev_region_num_young) + - (_region_num_tenured - _prev_region_num_tenured); + // We maintain the invariant that all objects allocated by mutator + // threads will be allocated out of eden regions. So, we can use + // the eden region number allocated since the previous GC to + // calculate the application's allocate rate. The only exception + // to that is humongous objects that are allocated separately. But + // given that humongous object allocations do not really affect + // either the pause's duration nor when the next pause will take + // place we can safely ignore them here. + size_t regions_allocated = eden_cset_region_length(); double alloc_rate_ms = (double) regions_allocated / app_time_ms; _alloc_rate_ms_seq->add(alloc_rate_ms); - _prev_region_num_young = _region_num_young; - _prev_region_num_tenured = _region_num_tenured; double interval_ms = (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0; @@ -1398,33 +1357,6 @@ } } - - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); - } - - if (G1PolicyVerbose > 1) { - gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n" - " ET-RS: %10.6f ms (avg: %10.6f ms)\n" - " |RS|: " SIZE_FORMAT, - elapsed_ms, recent_avg_time_for_pauses_ms(), - scan_rs_time, recent_avg_time_for_rs_scan_ms(), - rs_size); - - gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K" - " At end " SIZE_FORMAT "K\n" - " garbage : " SIZE_FORMAT "K" - " of " SIZE_FORMAT "K\n" - " survival : %6.2f%% (%6.2f%% avg)", - _cur_collection_pause_used_at_start_bytes/K, - _g1->used()/K, freed_bytes/K, - _collection_set_bytes_used_before/K, - survival_fraction*100.0, - recent_avg_survival_fraction()*100.0); - gclog_or_tty->print_cr(" Recent %% gc pause time: %6.2f", - recent_avg_pause_time_ratio() * 100.0); - } - // PrintGCDetails output if (PrintGCDetails) { bool print_marking_info = @@ -1436,7 +1368,6 @@ if (print_marking_info) { print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); - print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); } if (parallel) { @@ -1478,6 +1409,9 @@ print_stats(1, "Scan RS", scan_rs_time); print_stats(1, "Object Copying", obj_copy_time); } + if (print_marking_info) { + print_stats(1, "Complete CSet Marking", _mark_closure_time_ms); + } print_stats(1, "Clear CT", _cur_clear_ct_time_ms); #ifndef PRODUCT print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); @@ -1489,9 +1423,14 @@ } #endif print_stats(1, "Other", other_time_ms); - print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms); + print_stats(2, "Choose CSet", + (_recorded_young_cset_choice_time_ms + + _recorded_non_young_cset_choice_time_ms)); print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); + print_stats(2, "Free CSet", + (_recorded_young_free_cset_time_ms + + _recorded_non_young_free_cset_time_ms)); for (int i = 0; i < _aux_num; ++i) { if (_cur_aux_times_set[i]) { @@ -1576,8 +1515,6 @@ _short_lived_surv_rate_group->start_adding_regions(); // do that for any other surv rate groupsx - // - if (update_stats) { double pause_time_ms = elapsed_ms; @@ -1631,21 +1568,21 @@ _mark_closure_time_ms + termination_time); double young_other_time_ms = 0.0; - if (_recorded_young_regions > 0) { + if (young_cset_region_length() > 0) { young_other_time_ms = _recorded_young_cset_choice_time_ms + _recorded_young_free_cset_time_ms; _young_other_cost_per_region_ms_seq->add(young_other_time_ms / - (double) _recorded_young_regions); + (double) young_cset_region_length()); } double non_young_other_time_ms = 0.0; - if (_recorded_non_young_regions > 0) { + if (old_cset_region_length() > 0) { non_young_other_time_ms = _recorded_non_young_cset_choice_time_ms + _recorded_non_young_free_cset_time_ms; _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms / - (double) _recorded_non_young_regions); + (double) old_cset_region_length()); } double constant_other_time_ms = all_other_time_ms - @@ -1659,7 +1596,6 @@ } _pending_cards_seq->add((double) _pending_cards); - _scanned_cards_seq->add((double) cards_scanned); _rs_lengths_seq->add((double) _max_rs_lengths); double expensive_region_limit_ms = @@ -1670,49 +1606,6 @@ expensive_region_limit_ms = (double) MaxGCPauseMillis; } _expensive_region_limit_ms = expensive_region_limit_ms; - - if (PREDICTIONS_VERBOSE) { - gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d " - "REGIONS %d %d %d " - "PENDING_CARDS %d %d " - "CARDS_SCANNED %d %d " - "RS_LENGTHS %d %d " - "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf " - "SURVIVAL_RATIO %1.6lf %1.6lf " - "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf " - "OTHER_YOUNG %1.6lf %1.6lf " - "OTHER_NON_YOUNG %1.6lf %1.6lf " - "VTIME_DIFF %1.6lf TERMINATION %1.6lf " - "ELAPSED %1.6lf %1.6lf ", - _cur_collection_start_sec, - (!_last_young_gc_full) ? 2 : - (last_pause_included_initial_mark) ? 1 : 0, - _recorded_region_num, - _recorded_young_regions, - _recorded_non_young_regions, - _predicted_pending_cards, _pending_cards, - _predicted_cards_scanned, cards_scanned, - _predicted_rs_lengths, _max_rs_lengths, - _predicted_rs_update_time_ms, update_rs_time, - _predicted_rs_scan_time_ms, scan_rs_time, - _predicted_survival_ratio, survival_ratio, - _predicted_object_copy_time_ms, obj_copy_time, - _predicted_constant_other_time_ms, constant_other_time_ms, - _predicted_young_other_time_ms, young_other_time_ms, - _predicted_non_young_other_time_ms, - non_young_other_time_ms, - _vtime_diff_ms, termination_time, - _predicted_pause_time_ms, elapsed_ms); - } - - if (G1PolicyVerbose > 0) { - gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms", - _predicted_pause_time_ms, - (_within_target) ? "within" : "outside", - elapsed_ms); - } - } _in_marking_window = new_in_marking_window; @@ -1723,7 +1616,6 @@ // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); - // assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); } @@ -1768,8 +1660,6 @@ } } -// - void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, double update_rs_processed_buffers, double goal_ms) { @@ -1905,98 +1795,17 @@ } void -G1CollectorPolicy::start_recording_regions() { - _recorded_rs_lengths = 0; - _recorded_young_regions = 0; - _recorded_non_young_regions = 0; - -#if PREDICTIONS_VERBOSE - _recorded_marked_bytes = 0; - _recorded_young_bytes = 0; - _predicted_bytes_to_copy = 0; - _predicted_rs_lengths = 0; - _predicted_cards_scanned = 0; -#endif // PREDICTIONS_VERBOSE -} - -void -G1CollectorPolicy::record_cset_region_info(HeapRegion* hr, bool young) { -#if PREDICTIONS_VERBOSE - if (!young) { - _recorded_marked_bytes += hr->max_live_bytes(); - } - _predicted_bytes_to_copy += predict_bytes_to_copy(hr); -#endif // PREDICTIONS_VERBOSE - - size_t rs_length = hr->rem_set()->occupied(); - _recorded_rs_lengths += rs_length; -} - -void -G1CollectorPolicy::record_non_young_cset_region(HeapRegion* hr) { - assert(!hr->is_young(), "should not call this"); - ++_recorded_non_young_regions; - record_cset_region_info(hr, false); -} - -void -G1CollectorPolicy::set_recorded_young_regions(size_t n_regions) { - _recorded_young_regions = n_regions; -} - -void G1CollectorPolicy::set_recorded_young_bytes(size_t bytes) { -#if PREDICTIONS_VERBOSE - _recorded_young_bytes = bytes; -#endif // PREDICTIONS_VERBOSE +G1CollectorPolicy::init_cset_region_lengths(size_t eden_cset_region_length, + size_t survivor_cset_region_length) { + _eden_cset_region_length = eden_cset_region_length; + _survivor_cset_region_length = survivor_cset_region_length; + _old_cset_region_length = 0; } void G1CollectorPolicy::set_recorded_rs_lengths(size_t rs_lengths) { _recorded_rs_lengths = rs_lengths; } -void G1CollectorPolicy::set_predicted_bytes_to_copy(size_t bytes) { - _predicted_bytes_to_copy = bytes; -} - -void -G1CollectorPolicy::end_recording_regions() { - // The _predicted_pause_time_ms field is referenced in code - // not under PREDICTIONS_VERBOSE. Let's initialize it. - _predicted_pause_time_ms = -1.0; - -#if PREDICTIONS_VERBOSE - _predicted_pending_cards = predict_pending_cards(); - _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff(); - if (full_young_gcs()) - _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths); - else - _predicted_cards_scanned += - predict_non_young_card_num(_predicted_rs_lengths); - _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; - - _predicted_rs_update_time_ms = - predict_rs_update_time_ms(_g1->pending_card_num()); - _predicted_rs_scan_time_ms = - predict_rs_scan_time_ms(_predicted_cards_scanned); - _predicted_object_copy_time_ms = - predict_object_copy_time_ms(_predicted_bytes_to_copy); - _predicted_constant_other_time_ms = - predict_constant_other_time_ms(); - _predicted_young_other_time_ms = - predict_young_other_time_ms(_recorded_young_regions); - _predicted_non_young_other_time_ms = - predict_non_young_other_time_ms(_recorded_non_young_regions); - - _predicted_pause_time_ms = - _predicted_rs_update_time_ms + - _predicted_rs_scan_time_ms + - _predicted_object_copy_time_ms + - _predicted_constant_other_time_ms + - _predicted_young_other_time_ms + - _predicted_non_young_other_time_ms; -#endif // PREDICTIONS_VERBOSE -} - void G1CollectorPolicy::check_if_region_is_too_expensive(double predicted_time_ms) { // I don't think we need to do this when in young GC mode since @@ -2013,9 +1822,6 @@ } } -// - - void G1CollectorPolicy::update_recent_gc_times(double end_time_sec, double elapsed_ms) { _recent_gc_times_ms->add(elapsed_ms); @@ -2023,99 +1829,6 @@ _prev_collection_pause_end_ms = end_time_sec * 1000.0; } -double G1CollectorPolicy::recent_avg_time_for_pauses_ms() { - if (_recent_pause_times_ms->num() == 0) { - return (double) MaxGCPauseMillis; - } - return _recent_pause_times_ms->avg(); -} - -double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() { - if (_recent_rs_scan_times_ms->num() == 0) { - return (double)MaxGCPauseMillis/3.0; - } - return _recent_rs_scan_times_ms->avg(); -} - -int G1CollectorPolicy::number_of_recent_gcs() { - assert(_recent_rs_scan_times_ms->num() == - _recent_pause_times_ms->num(), "Sequence out of sync"); - assert(_recent_pause_times_ms->num() == - _recent_CS_bytes_used_before->num(), "Sequence out of sync"); - assert(_recent_CS_bytes_used_before->num() == - _recent_CS_bytes_surviving->num(), "Sequence out of sync"); - - return _recent_pause_times_ms->num(); -} - -double G1CollectorPolicy::recent_avg_survival_fraction() { - return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving, - _recent_CS_bytes_used_before); -} - -double G1CollectorPolicy::last_survival_fraction() { - return last_survival_fraction_work(_recent_CS_bytes_surviving, - _recent_CS_bytes_used_before); -} - -double -G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving, - TruncatedSeq* before) { - assert(surviving->num() == before->num(), "Sequence out of sync"); - if (before->sum() > 0.0) { - double recent_survival_rate = surviving->sum() / before->sum(); - // We exempt parallel collection from this check because Alloc Buffer - // fragmentation can produce negative collections. - // Further, we're now always doing parallel collection. But I'm still - // leaving this here as a placeholder for a more precise assertion later. - // (DLD, 10/05.) - assert((true || G1CollectedHeap::use_parallel_gc_threads()) || - _g1->evacuation_failed() || - recent_survival_rate <= 1.0, "Or bad frac"); - return recent_survival_rate; - } else { - return 1.0; // Be conservative. - } -} - -double -G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving, - TruncatedSeq* before) { - assert(surviving->num() == before->num(), "Sequence out of sync"); - if (surviving->num() > 0 && before->last() > 0.0) { - double last_survival_rate = surviving->last() / before->last(); - // We exempt parallel collection from this check because Alloc Buffer - // fragmentation can produce negative collections. - // Further, we're now always doing parallel collection. But I'm still - // leaving this here as a placeholder for a more precise assertion later. - // (DLD, 10/05.) - assert((true || G1CollectedHeap::use_parallel_gc_threads()) || - last_survival_rate <= 1.0, "Or bad frac"); - return last_survival_rate; - } else { - return 1.0; - } -} - -static const int survival_min_obs = 5; -static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 }; -static const double min_survival_rate = 0.1; - -double -G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg, - double latest) { - double res = avg; - if (number_of_recent_gcs() < survival_min_obs) { - res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]); - } - res = MAX2(res, latest); - res = MAX2(res, min_survival_rate); - // In the parallel case, LAB fragmentation can produce "negative - // collections"; so can evac failure. Cap at 1.0 - res = MIN2(res, 1.0); - return res; -} - size_t G1CollectorPolicy::expansion_amount() { double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0; double threshold = _gc_overhead_perc; @@ -2331,15 +2044,6 @@ print_summary_sd(0, buffer, &_all_aux_times_ms[i]); } } - - size_t all_region_num = _region_num_young + _region_num_tenured; - gclog_or_tty->print_cr(" New Regions %8d, Young %8d (%6.2lf%%), " - "Tenured %8d (%6.2lf%%)", - all_region_num, - _region_num_young, - (double) _region_num_young / (double) all_region_num * 100.0, - _region_num_tenured, - (double) _region_num_tenured / (double) all_region_num * 100.0); } if (TraceGen1Time) { if (_all_full_gc_times_ms->num() > 0) { @@ -2361,14 +2065,6 @@ #endif // PRODUCT } -void G1CollectorPolicy::update_region_num(bool young) { - if (young) { - ++_region_num_young; - } else { - ++_region_num_tenured; - } -} - #ifndef PRODUCT // for debugging, bit of a hack... static char* @@ -2617,6 +2313,7 @@ ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i); // Back to zero for the claim value. _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i, + _g1->workers()->active_workers(), HeapRegion::InitialClaimValue); jint regions_added = parKnownGarbageCl.marked_regions_added(); _hrSorted->incNumMarkedHeapRegions(regions_added); @@ -2628,7 +2325,7 @@ }; void -G1CollectorPolicy::record_concurrent_mark_cleanup_end() { +G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) { double start_sec; if (G1PrintParCleanupStats) { start_sec = os::elapsedTime(); @@ -2644,10 +2341,27 @@ if (G1CollectedHeap::use_parallel_gc_threads()) { const size_t OverpartitionFactor = 4; - const size_t MinWorkUnit = 8; - const size_t WorkUnit = - MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), - MinWorkUnit); + size_t WorkUnit; + // The use of MinChunkSize = 8 in the original code + // causes some assertion failures when the total number of + // region is less than 8. The code here tries to fix that. + // Should the original code also be fixed? + if (no_of_gc_threads > 0) { + const size_t MinWorkUnit = + MAX2(_g1->n_regions() / no_of_gc_threads, (size_t) 1U); + WorkUnit = + MAX2(_g1->n_regions() / (no_of_gc_threads * OverpartitionFactor), + MinWorkUnit); + } else { + assert(no_of_gc_threads > 0, + "The active gc workers should be greater than 0"); + // In a product build do something reasonable to avoid a crash. + const size_t MinWorkUnit = + MAX2(_g1->n_regions() / ParallelGCThreads, (size_t) 1U); + WorkUnit = + MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), + MinWorkUnit); + } _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(), WorkUnit); ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, @@ -2682,8 +2396,7 @@ } // Add the heap region at the head of the non-incremental collection set -void G1CollectorPolicy:: -add_to_collection_set(HeapRegion* hr) { +void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) { assert(_inc_cset_build_state == Active, "Precondition"); assert(!hr->is_young(), "non-incremental add of young region"); @@ -2694,9 +2407,11 @@ hr->set_in_collection_set(true); hr->set_next_in_collection_set(_collection_set); _collection_set = hr; - _collection_set_size++; _collection_set_bytes_used_before += hr->used(); _g1->register_region_with_in_cset_fast_test(hr); + size_t rs_length = hr->rem_set()->occupied(); + _recorded_rs_lengths += rs_length; + _old_cset_region_length += 1; } // Initialize the per-collection-set information @@ -2705,16 +2420,11 @@ _inc_cset_head = NULL; _inc_cset_tail = NULL; - _inc_cset_size = 0; _inc_cset_bytes_used_before = 0; - _inc_cset_young_index = 0; - _inc_cset_max_finger = 0; - _inc_cset_recorded_young_bytes = 0; _inc_cset_recorded_rs_lengths = 0; _inc_cset_predicted_elapsed_time_ms = 0; - _inc_cset_predicted_bytes_to_copy = 0; _inc_cset_build_state = Active; } @@ -2745,20 +2455,6 @@ // rset sampling code hr->set_recorded_rs_length(rs_length); hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms); - -#if PREDICTIONS_VERBOSE - size_t bytes_to_copy = predict_bytes_to_copy(hr); - _inc_cset_predicted_bytes_to_copy += bytes_to_copy; - - // Record the number of bytes used in this region - _inc_cset_recorded_young_bytes += used_bytes; - - // Cache the values we have added to the aggregated informtion - // in the heap region in case we have to remove this region from - // the incremental collection set, or it is updated by the - // rset sampling code - hr->set_predicted_bytes_to_copy(bytes_to_copy); -#endif // PREDICTIONS_VERBOSE } void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) { @@ -2784,17 +2480,6 @@ // Clear the values cached in the heap region hr->set_recorded_rs_length(0); hr->set_predicted_elapsed_time_ms(0); - -#if PREDICTIONS_VERBOSE - size_t old_predicted_bytes_to_copy = hr->predicted_bytes_to_copy(); - _inc_cset_predicted_bytes_to_copy -= old_predicted_bytes_to_copy; - - // Subtract the number of bytes used in this region - _inc_cset_recorded_young_bytes -= used_bytes; - - // Clear the values cached in the heap region - hr->set_predicted_bytes_to_copy(0); -#endif // PREDICTIONS_VERBOSE } void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) { @@ -2806,8 +2491,8 @@ } void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) { - assert( hr->is_young(), "invariant"); - assert( hr->young_index_in_cset() == -1, "invariant" ); + assert(hr->is_young(), "invariant"); + assert(hr->young_index_in_cset() > -1, "should have already been set"); assert(_inc_cset_build_state == Active, "Precondition"); // We need to clear and set the cached recorded/cached collection set @@ -2827,11 +2512,7 @@ hr->set_in_collection_set(true); assert( hr->next_in_collection_set() == NULL, "invariant"); - _inc_cset_size++; _g1->register_region_with_in_cset_fast_test(hr); - - hr->set_young_index_in_cset((int) _inc_cset_young_index); - ++_inc_cset_young_index; } // Add the region at the RHS of the incremental cset @@ -2899,8 +2580,6 @@ YoungList* young_list = _g1->young_list(); - start_recording_regions(); - guarantee(target_pause_time_ms > 0.0, err_msg("target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms)); @@ -2923,7 +2602,6 @@ if (time_remaining_ms < threshold) { double prev_time_remaining_ms = time_remaining_ms; time_remaining_ms = 0.50 * target_pause_time_ms; - _within_target = false; ergo_verbose3(ErgoCSetConstruction, "adjust remaining time", ergo_format_reason("remaining time lower than threshold") @@ -2931,8 +2609,6 @@ ergo_format_ms("threshold") ergo_format_ms("adjusted remaining time"), prev_time_remaining_ms, threshold, time_remaining_ms); - } else { - _within_target = true; } size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes; @@ -2941,8 +2617,6 @@ double young_start_time_sec = os::elapsedTime(); _collection_set_bytes_used_before = 0; - _collection_set_size = 0; - _young_cset_length = 0; _last_young_gc_full = full_young_gcs() ? true : false; if (_last_young_gc_full) { @@ -2955,9 +2629,9 @@ // pause are appended to the RHS of the young list, i.e. // [Newly Young Regions ++ Survivors from last pause]. - size_t survivor_region_num = young_list->survivor_length(); - size_t eden_region_num = young_list->length() - survivor_region_num; - size_t old_region_num = 0; + size_t survivor_region_length = young_list->survivor_length(); + size_t eden_region_length = young_list->length() - survivor_region_length; + init_cset_region_lengths(eden_region_length, survivor_region_length); hr = young_list->first_survivor_region(); while (hr != NULL) { assert(hr->is_survivor(), "badly formed young list"); @@ -2971,9 +2645,7 @@ if (_g1->mark_in_progress()) _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger); - _young_cset_length = _inc_cset_young_index; _collection_set = _inc_cset_head; - _collection_set_size = _inc_cset_size; _collection_set_bytes_used_before = _inc_cset_bytes_used_before; time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms; predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms; @@ -2983,19 +2655,12 @@ ergo_format_region("eden") ergo_format_region("survivors") ergo_format_ms("predicted young region time"), - eden_region_num, survivor_region_num, + eden_region_length, survivor_region_length, _inc_cset_predicted_elapsed_time_ms); // The number of recorded young regions is the incremental // collection set's current size - set_recorded_young_regions(_inc_cset_size); set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths); - set_recorded_young_bytes(_inc_cset_recorded_young_bytes); -#if PREDICTIONS_VERBOSE - set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy); -#endif // PREDICTIONS_VERBOSE - - assert(_inc_cset_size == young_list->length(), "Invariant"); double young_end_time_sec = os::elapsedTime(); _recorded_young_cset_choice_time_ms = @@ -3009,9 +2674,16 @@ NumberSeq seq; double avg_prediction = 100000000000000000.0; // something very large - size_t prev_collection_set_size = _collection_set_size; double prev_predicted_pause_time_ms = predicted_pause_time_ms; do { + // Note that add_old_region_to_cset() increments the + // _old_cset_region_length field and cset_region_length() returns the + // sum of _eden_cset_region_length, _survivor_cset_region_length, and + // _old_cset_region_length. So, as old regions are added to the + // CSet, _old_cset_region_length will be incremented and + // cset_region_length(), which is used below, will always reflect + // the the total number of regions added up to this point to the CSet. + hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, avg_prediction); if (hr != NULL) { @@ -3019,8 +2691,7 @@ double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); time_remaining_ms -= predicted_time_ms; predicted_pause_time_ms += predicted_time_ms; - add_to_collection_set(hr); - record_non_young_cset_region(hr); + add_old_region_to_cset(hr); seq.add(predicted_time_ms); avg_prediction = seq.avg() + seq.sd(); } @@ -3041,13 +2712,13 @@ should_continue = false; } } else { - if (_collection_set_size >= _young_list_fixed_length) { + if (cset_region_length() >= _young_list_fixed_length) { ergo_verbose2(ErgoCSetConstruction, "stop adding old regions to CSet", ergo_format_reason("CSet length reached target") ergo_format_region("CSet") ergo_format_region("young target"), - _collection_set_size, _young_list_fixed_length); + cset_region_length(), _young_list_fixed_length); should_continue = false; } } @@ -3055,23 +2726,21 @@ } while (should_continue); if (!adaptive_young_list_length() && - _collection_set_size < _young_list_fixed_length) { + cset_region_length() < _young_list_fixed_length) { ergo_verbose2(ErgoCSetConstruction, "request partially-young GCs end", ergo_format_reason("CSet length lower than target") ergo_format_region("CSet") ergo_format_region("young target"), - _collection_set_size, _young_list_fixed_length); + cset_region_length(), _young_list_fixed_length); _should_revert_to_full_young_gcs = true; } - old_region_num = _collection_set_size - prev_collection_set_size; - ergo_verbose2(ErgoCSetConstruction | ErgoHigh, "add old regions to CSet", ergo_format_region("old") ergo_format_ms("predicted old region time"), - old_region_num, + old_cset_region_length(), predicted_pause_time_ms - prev_predicted_pause_time_ms); } @@ -3079,8 +2748,6 @@ count_CS_bytes_used(); - end_recording_regions(); - ergo_verbose5(ErgoCSetConstruction, "finish choosing CSet", ergo_format_region("eden") @@ -3088,7 +2755,8 @@ ergo_format_region("old") ergo_format_ms("predicted pause time") ergo_format_ms("target pause time"), - eden_region_num, survivor_region_num, old_region_num, + eden_region_length, survivor_region_length, + old_cset_region_length(), predicted_pause_time_ms, target_pause_time_ms); double non_young_end_time_sec = os::elapsedTime(); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -85,13 +85,13 @@ class G1CollectorPolicy: public CollectorPolicy { private: - // The number of pauses during the execution. - long _n_pauses; - // either equal to the number of parallel threads, if ParallelGCThreads // has been set, or 1 otherwise int _parallel_gc_threads; + // The number of GC threads currently active. + uintx _no_of_gc_threads; + enum SomePrivateConstants { NumPrevPausesForHeuristics = 10 }; @@ -127,18 +127,9 @@ jlong _num_cc_clears; // number of times the card count cache has been cleared #endif - // Statistics for recent GC pauses. See below for how indexed. - TruncatedSeq* _recent_rs_scan_times_ms; - // These exclude marking times. - TruncatedSeq* _recent_pause_times_ms; TruncatedSeq* _recent_gc_times_ms; - TruncatedSeq* _recent_CS_bytes_used_before; - TruncatedSeq* _recent_CS_bytes_surviving; - - TruncatedSeq* _recent_rs_sizes; - TruncatedSeq* _concurrent_mark_remark_times_ms; TruncatedSeq* _concurrent_mark_cleanup_times_ms; @@ -150,13 +141,6 @@ NumberSeq* _all_stop_world_times_ms; NumberSeq* _all_yield_times_ms; - size_t _region_num_young; - size_t _region_num_tenured; - size_t _prev_region_num_young; - size_t _prev_region_num_tenured; - - NumberSeq* _all_mod_union_times_ms; - int _aux_num; NumberSeq* _all_aux_times_ms; double* _cur_aux_start_times_ms; @@ -194,7 +178,6 @@ // locker is active. This should be >= _young_list_target_length; size_t _young_list_max_length; - size_t _young_cset_length; bool _last_young_gc_full; unsigned _full_young_pause_num; @@ -217,8 +200,6 @@ return _during_marking; } - // - private: enum PredictionConstants { TruncatedSeqLength = 10 @@ -240,47 +221,32 @@ TruncatedSeq* _non_young_other_cost_per_region_ms_seq; TruncatedSeq* _pending_cards_seq; - TruncatedSeq* _scanned_cards_seq; TruncatedSeq* _rs_lengths_seq; TruncatedSeq* _cost_per_byte_ms_during_cm_seq; TruncatedSeq* _young_gc_eff_seq; - TruncatedSeq* _max_conc_overhead_seq; - bool _using_new_ratio_calculations; size_t _min_desired_young_length; // as set on the command line or default calculations size_t _max_desired_young_length; // as set on the command line or default calculations - size_t _recorded_young_regions; - size_t _recorded_non_young_regions; - size_t _recorded_region_num; + size_t _eden_cset_region_length; + size_t _survivor_cset_region_length; + size_t _old_cset_region_length; + + void init_cset_region_lengths(size_t eden_cset_region_length, + size_t survivor_cset_region_length); + + size_t eden_cset_region_length() { return _eden_cset_region_length; } + size_t survivor_cset_region_length() { return _survivor_cset_region_length; } + size_t old_cset_region_length() { return _old_cset_region_length; } size_t _free_regions_at_end_of_collection; size_t _recorded_rs_lengths; size_t _max_rs_lengths; - size_t _recorded_marked_bytes; - size_t _recorded_young_bytes; - - size_t _predicted_pending_cards; - size_t _predicted_cards_scanned; - size_t _predicted_rs_lengths; - size_t _predicted_bytes_to_copy; - - double _predicted_survival_ratio; - double _predicted_rs_update_time_ms; - double _predicted_rs_scan_time_ms; - double _predicted_object_copy_time_ms; - double _predicted_constant_other_time_ms; - double _predicted_young_other_time_ms; - double _predicted_non_young_other_time_ms; - double _predicted_pause_time_ms; - - double _vtime_diff_ms; - double _recorded_young_free_cset_time_ms; double _recorded_non_young_free_cset_time_ms; @@ -317,21 +283,28 @@ double update_rs_processed_buffers, double goal_ms); + uintx no_of_gc_threads() { return _no_of_gc_threads; } + void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; } + double _pause_time_target_ms; double _recorded_young_cset_choice_time_ms; double _recorded_non_young_cset_choice_time_ms; - bool _within_target; size_t _pending_cards; size_t _max_pending_cards; public: + // Accessors - void set_region_short_lived(HeapRegion* hr) { + void set_region_eden(HeapRegion* hr, int young_index_in_cset) { + hr->set_young(); hr->install_surv_rate_group(_short_lived_surv_rate_group); + hr->set_young_index_in_cset(young_index_in_cset); } - void set_region_survivors(HeapRegion* hr) { + void set_region_survivor(HeapRegion* hr, int young_index_in_cset) { + assert(hr->is_young() && hr->is_survivor(), "pre-condition"); hr->install_surv_rate_group(_survivor_surv_rate_group); + hr->set_young_index_in_cset(young_index_in_cset); } #ifndef PRODUCT @@ -343,10 +316,6 @@ seq->davg() * confidence_factor(seq->num())); } - size_t young_cset_length() { - return _young_cset_length; - } - void record_max_rs_lengths(size_t rs_lengths) { _max_rs_lengths = rs_lengths; } @@ -465,20 +434,12 @@ size_t predict_bytes_to_copy(HeapRegion* hr); double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); - void start_recording_regions(); - void record_cset_region_info(HeapRegion* hr, bool young); - void record_non_young_cset_region(HeapRegion* hr); + void set_recorded_rs_lengths(size_t rs_lengths); - void set_recorded_young_regions(size_t n_regions); - void set_recorded_young_bytes(size_t bytes); - void set_recorded_rs_lengths(size_t rs_lengths); - void set_predicted_bytes_to_copy(size_t bytes); - - void end_recording_regions(); - - void record_vtime_diff_ms(double vtime_diff_ms) { - _vtime_diff_ms = vtime_diff_ms; - } + size_t cset_region_length() { return young_cset_region_length() + + old_cset_region_length(); } + size_t young_cset_region_length() { return eden_cset_region_length() + + survivor_cset_region_length(); } void record_young_free_cset_time_ms(double time_ms) { _recorded_young_free_cset_time_ms = time_ms; @@ -494,8 +455,6 @@ double predict_survivor_regions_evac_time(); - // - void cset_regions_freed() { bool propagate = _last_young_gc_full && !_in_marking_window; _short_lived_surv_rate_group->all_surviving_words_recorded(propagate); @@ -575,8 +534,6 @@ double sum_of_values (double* data); double max_sum (double* data1, double* data2); - int _last_satb_drain_processed_buffers; - int _last_update_rs_processed_buffers; double _last_pause_time_ms; size_t _bytes_in_collection_set_before_gc; @@ -596,10 +553,6 @@ // set at the start of the pause. HeapRegion* _collection_set; - // The number of regions in the collection set. Set from the incrementally - // built collection set at the start of an evacuation pause. - size_t _collection_set_size; - // The number of bytes in the collection set before the pause. Set from // the incrementally built collection set at the start of an evacuation // pause. @@ -622,16 +575,6 @@ // The tail of the incrementally built collection set. HeapRegion* _inc_cset_tail; - // The number of regions in the incrementally built collection set. - // Used to set _collection_set_size at the start of an evacuation - // pause. - size_t _inc_cset_size; - - // Used as the index in the surving young words structure - // which tracks the amount of space, for each young region, - // that survives the pause. - size_t _inc_cset_young_index; - // The number of bytes in the incrementally built collection set. // Used to set _collection_set_bytes_used_before at the start of // an evacuation pause. @@ -640,11 +583,6 @@ // Used to record the highest end of heap region in collection set HeapWord* _inc_cset_max_finger; - // The number of recorded used bytes in the young regions - // of the collection set. This is the sum of the used() bytes - // of retired young regions in the collection set. - size_t _inc_cset_recorded_young_bytes; - // The RSet lengths recorded for regions in the collection set // (updated by the periodic sampling of the regions in the // young list/collection set). @@ -655,68 +593,9 @@ // regions in the young list/collection set). double _inc_cset_predicted_elapsed_time_ms; - // The predicted bytes to copy for the regions in the collection - // set (updated by the periodic sampling of the regions in the - // young list/collection set). - size_t _inc_cset_predicted_bytes_to_copy; - // Stash a pointer to the g1 heap. G1CollectedHeap* _g1; - // The average time in ms per collection pause, averaged over recent pauses. - double recent_avg_time_for_pauses_ms(); - - // The average time in ms for RS scanning, per pause, averaged - // over recent pauses. (Note the RS scanning time for a pause - // is itself an average of the RS scanning time for each worker - // thread.) - double recent_avg_time_for_rs_scan_ms(); - - // The number of "recent" GCs recorded in the number sequences - int number_of_recent_gcs(); - - // The average survival ratio, computed by the total number of bytes - // suriviving / total number of bytes before collection over the last - // several recent pauses. - double recent_avg_survival_fraction(); - // The survival fraction of the most recent pause; if there have been no - // pauses, returns 1.0. - double last_survival_fraction(); - - // Returns a "conservative" estimate of the recent survival rate, i.e., - // one that may be higher than "recent_avg_survival_fraction". - // This is conservative in several ways: - // If there have been few pauses, it will assume a potential high - // variance, and err on the side of caution. - // It puts a lower bound (currently 0.1) on the value it will return. - // To try to detect phase changes, if the most recent pause ("latest") has a - // higher-than average ("avg") survival rate, it returns that rate. - // "work" version is a utility function; young is restricted to young regions. - double conservative_avg_survival_fraction_work(double avg, - double latest); - - // The arguments are the two sequences that keep track of the number of bytes - // surviving and the total number of bytes before collection, resp., - // over the last evereal recent pauses - // Returns the survival rate for the category in the most recent pause. - // If there have been no pauses, returns 1.0. - double last_survival_fraction_work(TruncatedSeq* surviving, - TruncatedSeq* before); - - // The arguments are the two sequences that keep track of the number of bytes - // surviving and the total number of bytes before collection, resp., - // over the last several recent pauses - // Returns the average survival ration over the last several recent pauses - // If there have been no pauses, return 1.0 - double recent_avg_survival_fraction_work(TruncatedSeq* surviving, - TruncatedSeq* before); - - double conservative_avg_survival_fraction() { - double avg = recent_avg_survival_fraction(); - double latest = last_survival_fraction(); - return conservative_avg_survival_fraction_work(avg, latest); - } - // The ratio of gc time to elapsed time, computed over recent pauses. double _recent_avg_pause_time_ratio; @@ -724,9 +603,6 @@ return _recent_avg_pause_time_ratio; } - // Number of pauses between concurrent marking. - size_t _pauses_btwn_concurrent_mark; - // At the end of a pause we check the heap occupancy and we decide // whether we will start a marking cycle during the next pause. If // we decide that we want to do that, we will set this parameter to @@ -849,9 +725,6 @@ GenRemSet::Name rem_set_name() { return GenRemSet::CardTable; } - // The number of collection pauses so far. - long n_pauses() const { return _n_pauses; } - // Update the heuristic info to record a collection pause of the given // start time, where the given number of bytes were used at the start. // This may involve changing the desired size of a collection set. @@ -864,19 +737,21 @@ void record_concurrent_mark_init_end(double mark_init_elapsed_time_ms); - void record_mark_closure_time(double mark_closure_time_ms); + void record_mark_closure_time(double mark_closure_time_ms) { + _mark_closure_time_ms = mark_closure_time_ms; + } void record_concurrent_mark_remark_start(); void record_concurrent_mark_remark_end(); void record_concurrent_mark_cleanup_start(); - void record_concurrent_mark_cleanup_end(); + void record_concurrent_mark_cleanup_end(int no_of_gc_threads); void record_concurrent_mark_cleanup_completed(); void record_concurrent_pause(); void record_concurrent_pause_end(); - void record_collection_pause_end(); + void record_collection_pause_end(int no_of_gc_threads); void print_heap_transition(); // Record the fact that a full collection occurred. @@ -900,15 +775,6 @@ _cur_satb_drain_time_ms = ms; } - void record_satb_drain_processed_buffers(int processed_buffers) { - assert(_g1->mark_in_progress(), "shouldn't be here otherwise"); - _last_satb_drain_processed_buffers = processed_buffers; - } - - void record_mod_union_time(double ms) { - _all_mod_union_times_ms->add(ms); - } - void record_update_rs_time(int thread, double ms) { _par_last_update_rs_times_ms[thread] = ms; } @@ -1009,11 +875,8 @@ void clear_collection_set() { _collection_set = NULL; } - // The number of elements in the current collection set. - size_t collection_set_size() { return _collection_set_size; } - - // Add "hr" to the CS. - void add_to_collection_set(HeapRegion* hr); + // Add old region "hr" to the CSet. + void add_old_region_to_cset(HeapRegion* hr); // Incremental CSet Support @@ -1023,9 +886,6 @@ // The tail of the incrementally built collection set. HeapRegion* inc_set_tail() { return _inc_cset_tail; } - // The number of elements in the incrementally built collection set. - size_t inc_cset_size() { return _inc_cset_size; } - // Initialize incremental collection set info. void start_incremental_cset_building(); @@ -1125,8 +985,6 @@ return _young_list_max_length; } - void update_region_num(bool young); - bool full_young_gcs() { return _full_young_gcs; } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -209,29 +209,9 @@ size_t cards_looked_up() { return _cards;} }; -// We want the parallel threads to start their scanning at -// different collection set regions to avoid contention. -// If we have: -// n collection set regions -// p threads -// Then thread t will start at region t * floor (n/p) - -HeapRegion* G1RemSet::calculateStartRegion(int worker_i) { - HeapRegion* result = _g1p->collection_set(); - if (ParallelGCThreads > 0) { - size_t cs_size = _g1p->collection_set_size(); - int n_workers = _g1->workers()->total_workers(); - size_t cs_spans = cs_size / n_workers; - size_t ind = cs_spans * worker_i; - for (size_t i = 0; i < ind; i++) - result = result->next_in_collection_set(); - } - return result; -} - void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { double rs_time_start = os::elapsedTime(); - HeapRegion *startRegion = calculateStartRegion(worker_i); + HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); ScanRSClosure scanRScl(oc, worker_i); @@ -430,8 +410,10 @@ DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); dcqs.concatenate_logs(); - if (ParallelGCThreads > 0) { - _seq_task->set_n_threads((int)n_workers()); + if (G1CollectedHeap::use_parallel_gc_threads()) { + // Don't set the number of workers here. It will be set + // when the task is run + // _seq_task->set_n_termination((int)n_workers()); } guarantee( _cards_scanned == NULL, "invariant" ); _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); @@ -578,7 +560,10 @@ void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, int worker_num, int claim_val) { ScrubRSClosure scrub_cl(region_bm, card_bm); - _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); + _g1->heap_region_par_iterate_chunked(&scrub_cl, + worker_num, + (int) n_workers(), + claim_val); } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -104,8 +104,6 @@ void scanRS(OopsInHeapRegionClosure* oc, int worker_i); void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i); - HeapRegion* calculateStartRegion(int i); - CardTableModRefBS* ct_bs() { return _ct_bs; } size_t cardsScanned() { return _total_cards_scanned; } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -39,10 +39,6 @@ develop(intx, G1MarkingOverheadPercent, 0, \ "Overhead of concurrent marking") \ \ - \ - develop(intx, G1PolicyVerbose, 0, \ - "The verbosity level on G1 policy decisions") \ - \ develop(intx, G1MarkingVerboseLevel, 0, \ "Level (0-4) of verboseness of the marking code") \ \ @@ -58,9 +54,6 @@ develop(bool, G1TraceMarkStackOverflow, false, \ "If true, extra debugging code for CM restart for ovflw.") \ \ - develop(intx, G1PausesBtwnConcMark, -1, \ - "If positive, fixed number of pauses between conc markings") \ - \ diagnostic(bool, G1SummarizeConcMark, false, \ "Summarize concurrent mark info") \ \ diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -367,12 +367,13 @@ static void setup_heap_region_size(uintx min_heap_size); enum ClaimValues { - InitialClaimValue = 0, - FinalCountClaimValue = 1, - NoteEndClaimValue = 2, - ScrubRemSetClaimValue = 3, - ParVerifyClaimValue = 4, - RebuildRSClaimValue = 5 + InitialClaimValue = 0, + FinalCountClaimValue = 1, + NoteEndClaimValue = 2, + ScrubRemSetClaimValue = 3, + ParVerifyClaimValue = 4, + RebuildRSClaimValue = 5, + CompleteMarkCSetClaimValue = 6 }; inline HeapWord* par_allocate_no_bot_updates(size_t word_size) { @@ -416,7 +417,7 @@ void add_to_marked_bytes(size_t incr_bytes) { _next_marked_bytes = _next_marked_bytes + incr_bytes; - guarantee( _next_marked_bytes <= used(), "invariant" ); + assert(_next_marked_bytes <= used(), "invariant" ); } void zero_marked_bytes() { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -33,6 +33,7 @@ #include "runtime/java.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/virtualspace.hpp" +#include "runtime/vmThread.hpp" void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, OopsInGenClosure* cl, @@ -42,6 +43,11 @@ assert((n_threads == 1 && ParallelGCThreads == 0) || n_threads <= (int)ParallelGCThreads, "# worker threads != # requested!"); + assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread"); + assert(UseDynamicNumberOfGCThreads || + !FLAG_IS_DEFAULT(ParallelGCThreads) || + n_threads == (int)ParallelGCThreads, + "# worker threads != # requested!"); // Make sure the LNC array is valid for the space. jbyte** lowest_non_clean; uintptr_t lowest_non_clean_base_chunk_index; @@ -52,6 +58,8 @@ int n_strides = n_threads * ParGCStridesPerThread; SequentialSubTasksDone* pst = sp->par_seq_tasks(); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). pst->set_n_threads(n_threads); pst->set_n_tasks(n_strides); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -305,7 +305,7 @@ inline ParScanThreadState& thread_state(int i); - void reset(bool promotion_failed); + void reset(int active_workers, bool promotion_failed); void flush(); #if TASKQUEUE_STATS @@ -322,6 +322,9 @@ ParallelTaskTerminator& _term; ParNewGeneration& _gen; Generation& _next_gen; + public: + bool is_valid(int id) const { return id < length(); } + ParallelTaskTerminator* terminator() { return &_term; } }; @@ -351,9 +354,9 @@ } -void ParScanThreadStateSet::reset(bool promotion_failed) +void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed) { - _term.reset_for_reuse(); + _term.reset_for_reuse(active_threads); if (promotion_failed) { for (int i = 0; i < length(); ++i) { thread_state(i).print_and_clear_promotion_failure_size(); @@ -569,6 +572,24 @@ _state_set(state_set) {} +// Reset the terminator for the given number of +// active threads. +void ParNewGenTask::set_for_termination(int active_workers) { + _state_set->reset(active_workers, _gen->promotion_failed()); + // Should the heap be passed in? There's only 1 for now so + // grab it instead. + GenCollectedHeap* gch = GenCollectedHeap::heap(); + gch->set_n_termination(active_workers); +} + +// The "i" passed to this method is the part of the work for +// this thread. It is not the worker ID. The "i" is derived +// from _started_workers which is incremented in internal_note_start() +// called in GangWorker loop() and which is called under the +// which is called under the protection of the gang monitor and is +// called after a task is started. So "i" is based on +// first-come-first-served. + void ParNewGenTask::work(int i) { GenCollectedHeap* gch = GenCollectedHeap::heap(); // Since this is being done in a separate thread, need new resource @@ -581,6 +602,8 @@ Generation* old_gen = gch->next_gen(_gen); ParScanThreadState& par_scan_state = _state_set->thread_state(i); + assert(_state_set->is_valid(i), "Should not have been called"); + par_scan_state.set_young_old_boundary(_young_old_boundary); par_scan_state.start_strong_roots(); @@ -733,7 +756,9 @@ private: virtual void work(int i); - + virtual void set_for_termination(int active_workers) { + _state_set.terminator()->reset_for_reuse(active_workers); + } private: ParNewGeneration& _gen; ProcessTask& _task; @@ -789,18 +814,20 @@ GenCollectedHeap* gch = GenCollectedHeap::heap(); assert(gch->kind() == CollectedHeap::GenCollectedHeap, "not a generational heap"); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); + _state_set.reset(workers->active_workers(), _generation.promotion_failed()); ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(), _generation.reserved().end(), _state_set); workers->run_task(&rp_task); - _state_set.reset(_generation.promotion_failed()); + _state_set.reset(0 /* bad value in debug if not reset */, + _generation.promotion_failed()); } void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); ParNewRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -856,7 +883,13 @@ assert(gch->kind() == CollectedHeap::GenCollectedHeap, "not a CMS generational heap"); AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); + assert(workers != NULL, "Need workgang for parallel work"); + int active_workers = + AdaptiveSizePolicy::calc_active_workers(workers->total_workers(), + workers->active_workers(), + Threads::number_of_non_daemon_threads()); + workers->set_active_workers(active_workers); _next_gen = gch->next_gen(this); assert(_next_gen != NULL, "This must be the youngest gen, and not the only gen"); @@ -894,13 +927,19 @@ gch->save_marks(); assert(workers != NULL, "Need parallel worker threads."); - ParallelTaskTerminator _term(workers->total_workers(), task_queues()); - ParScanThreadStateSet thread_state_set(workers->total_workers(), + int n_workers = active_workers; + + // Set the correct parallelism (number of queues) in the reference processor + ref_processor()->set_active_mt_degree(n_workers); + + // Always set the terminator for the active number of workers + // because only those workers go through the termination protocol. + ParallelTaskTerminator _term(n_workers, task_queues()); + ParScanThreadStateSet thread_state_set(workers->active_workers(), *to(), *this, *_next_gen, *task_queues(), _overflow_stacks, desired_plab_sz(), _term); ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); - int n_workers = workers->total_workers(); gch->set_par_threads(n_workers); gch->rem_set()->prepare_for_younger_refs_iterate(true); // It turns out that even when we're using 1 thread, doing the work in a @@ -914,7 +953,8 @@ GenCollectedHeap::StrongRootsScope srs(gch); tsk.work(0); } - thread_state_set.reset(promotion_failed()); + thread_state_set.reset(0 /* Bad value in debug if not reset */, + promotion_failed()); // Process (weak) reference objects found during scavenge. ReferenceProcessor* rp = ref_processor(); @@ -927,6 +967,8 @@ EvacuateFollowersClosureGeneral evacuate_followers(gch, _level, &scan_without_gc_barrier, &scan_with_gc_barrier); rp->setup_policy(clear_all_soft_refs); + // Can the mt_degree be set later (at run_task() time would be best)? + rp->set_active_mt_degree(active_workers); if (rp->processing_is_mt()) { ParNewRefProcTaskExecutor task_executor(*this, thread_state_set); rp->process_discovered_references(&is_alive, &keep_alive, diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -240,6 +240,10 @@ HeapWord* young_old_boundary() { return _young_old_boundary; } void work(int i); + + // Reset the terminator in ParScanThreadStateSet for + // "active_workers" threads. + virtual void set_for_termination(int active_workers); }; class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -223,7 +223,8 @@ MutableSpace* sp, HeapWord* space_top, PSPromotionManager* pm, - uint stripe_number) { + uint stripe_number, + uint stripe_total) { int ssize = 128; // Naked constant! Work unit = 64k. int dirty_card_count = 0; @@ -231,7 +232,11 @@ jbyte* start_card = byte_for(sp->bottom()); jbyte* end_card = byte_for(sp_top - 1) + 1; oop* last_scanned = NULL; // Prevent scanning objects more than once - for (jbyte* slice = start_card; slice < end_card; slice += ssize*ParallelGCThreads) { + // The width of the stripe ssize*stripe_total must be + // consistent with the number of stripes so that the complete slice + // is covered. + size_t slice_width = ssize * stripe_total; + for (jbyte* slice = start_card; slice < end_card; slice += slice_width) { jbyte* worker_start_card = slice + stripe_number * ssize; if (worker_start_card >= end_card) return; // We're done. diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -69,7 +69,8 @@ MutableSpace* sp, HeapWord* space_top, PSPromotionManager* pm, - uint stripe_number); + uint stripe_number, + uint stripe_total); // Verification static void verify_all_young_refs_imprecise(); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "gc_implementation/parallelScavenge/gcTaskManager.hpp" #include "gc_implementation/parallelScavenge/gcTaskThread.hpp" +#include "gc_implementation/shared/adaptiveSizePolicy.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" #include "runtime/mutex.hpp" @@ -181,6 +182,7 @@ } set_insert_end(task); increment_length(); + verify_length(); if (TraceGCTaskQueue) { print("after:"); } @@ -192,7 +194,7 @@ tty->print_cr("[" INTPTR_FORMAT "]" " GCTaskQueue::enqueue(list: " INTPTR_FORMAT ")", - this); + this, list); print("before:"); list->print("list:"); } @@ -211,14 +213,15 @@ list->remove_end()->set_older(insert_end()); insert_end()->set_newer(list->remove_end()); set_insert_end(list->insert_end()); + set_length(length() + list_length); // empty the argument list. } - set_length(length() + list_length); list->initialize(); if (TraceGCTaskQueue) { print("after:"); list->print("list:"); } + verify_length(); } // Dequeue one task. @@ -288,6 +291,7 @@ decrement_length(); assert(result->newer() == NULL, "shouldn't be on queue"); assert(result->older() == NULL, "shouldn't be on queue"); + verify_length(); return result; } @@ -311,22 +315,40 @@ result->set_newer(NULL); result->set_older(NULL); decrement_length(); + verify_length(); return result; } NOT_PRODUCT( +// Count the elements in the queue and verify the length against +// that count. +void GCTaskQueue::verify_length() const { + uint count = 0; + for (GCTask* element = insert_end(); + element != NULL; + element = element->older()) { + + count++; + } + assert(count == length(), "Length does not match queue"); +} + void GCTaskQueue::print(const char* message) const { tty->print_cr("[" INTPTR_FORMAT "] GCTaskQueue:" " insert_end: " INTPTR_FORMAT " remove_end: " INTPTR_FORMAT + " length: %d" " %s", - this, insert_end(), remove_end(), message); + this, insert_end(), remove_end(), length(), message); + uint count = 0; for (GCTask* element = insert_end(); element != NULL; element = element->older()) { element->print(" "); + count++; tty->cr(); } + tty->print("Total tasks: %d", count); } ) @@ -351,12 +373,16 @@ // GCTaskManager::GCTaskManager(uint workers) : _workers(workers), + _active_workers(0), + _idle_workers(0), _ndc(NULL) { initialize(); } GCTaskManager::GCTaskManager(uint workers, NotifyDoneClosure* ndc) : _workers(workers), + _active_workers(0), + _idle_workers(0), _ndc(ndc) { initialize(); } @@ -373,6 +399,7 @@ GCTaskQueue* unsynchronized_queue = GCTaskQueue::create_on_c_heap(); _queue = SynchronizedGCTaskQueue::create(unsynchronized_queue, lock()); _noop_task = NoopGCTask::create_on_c_heap(); + _idle_inactive_task = WaitForBarrierGCTask::create_on_c_heap(); _resource_flag = NEW_C_HEAP_ARRAY(bool, workers()); { // Set up worker threads. @@ -418,6 +445,8 @@ assert(queue()->is_empty(), "still have queued work"); NoopGCTask::destroy(_noop_task); _noop_task = NULL; + WaitForBarrierGCTask::destroy(_idle_inactive_task); + _idle_inactive_task = NULL; if (_thread != NULL) { for (uint i = 0; i < workers(); i += 1) { GCTaskThread::destroy(thread(i)); @@ -442,6 +471,86 @@ } } +void GCTaskManager::set_active_gang() { + _active_workers = + AdaptiveSizePolicy::calc_active_workers(workers(), + active_workers(), + Threads::number_of_non_daemon_threads()); + + assert(!all_workers_active() || active_workers() == ParallelGCThreads, + err_msg("all_workers_active() is incorrect: " + "active %d ParallelGCThreads %d", active_workers(), + ParallelGCThreads)); + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("GCTaskManager::set_active_gang(): " + "all_workers_active() %d workers %d " + "active %d ParallelGCThreads %d ", + all_workers_active(), workers(), active_workers(), + ParallelGCThreads); + } +} + +// Create IdleGCTasks for inactive workers. +// Creates tasks in a ResourceArea and assumes +// an appropriate ResourceMark. +void GCTaskManager::task_idle_workers() { + { + int more_inactive_workers = 0; + { + // Stop any idle tasks from exiting their IdleGCTask's + // and get the count for additional IdleGCTask's under + // the GCTaskManager's monitor so that the "more_inactive_workers" + // count is correct. + MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); + _idle_inactive_task->set_should_wait(true); + // active_workers are a number being requested. idle_workers + // are the number currently idle. If all the workers are being + // requested to be active but some are already idle, reduce + // the number of active_workers to be consistent with the + // number of idle_workers. The idle_workers are stuck in + // idle tasks and will no longer be release (since a new GC + // is starting). Try later to release enough idle_workers + // to allow the desired number of active_workers. + more_inactive_workers = + workers() - active_workers() - idle_workers(); + if (more_inactive_workers < 0) { + int reduced_active_workers = active_workers() + more_inactive_workers; + set_active_workers(reduced_active_workers); + more_inactive_workers = 0; + } + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("JT: %d workers %d active %d " + "idle %d more %d", + Threads::number_of_non_daemon_threads(), + workers(), + active_workers(), + idle_workers(), + more_inactive_workers); + } + } + GCTaskQueue* q = GCTaskQueue::create(); + for(uint i = 0; i < (uint) more_inactive_workers; i++) { + q->enqueue(IdleGCTask::create_on_c_heap()); + increment_idle_workers(); + } + assert(workers() == active_workers() + idle_workers(), + "total workers should equal active + inactive"); + add_list(q); + // GCTaskQueue* q was created in a ResourceArea so a + // destroy() call is not needed. + } +} + +void GCTaskManager::release_idle_workers() { + { + MutexLockerEx ml(monitor(), + Mutex::_no_safepoint_check_flag); + _idle_inactive_task->set_should_wait(false); + monitor()->notify_all(); + // Release monitor + } +} + void GCTaskManager::print_task_time_stamps() { for(uint i=0; ikind())); tty->print_cr(" %s", result->name()); } - increment_busy_workers(); - increment_delivered_tasks(); + if (!result->is_idle_task()) { + increment_busy_workers(); + increment_delivered_tasks(); + } return result; // Release monitor(). } @@ -622,6 +740,7 @@ uint GCTaskManager::decrement_busy_workers() { assert(queue()->own_lock(), "don't own the lock"); + assert(_busy_workers > 0, "About to make a mistake"); _busy_workers -= 1; return _busy_workers; } @@ -643,11 +762,28 @@ set_resource_flag(which, false); } +// "list" contains tasks that are ready to execute. Those +// tasks are added to the GCTaskManager's queue of tasks and +// then the GC workers are notified that there is new work to +// do. +// +// Typically different types of tasks can be added to the "list". +// For example in PSScavenge OldToYoungRootsTask, SerialOldToYoungRootsTask, +// ScavengeRootsTask, and StealTask tasks are all added to the list +// and then the GC workers are notified of new work. The tasks are +// handed out in the order in which they are added to the list +// (although execution is not necessarily in that order). As long +// as any tasks are running the GCTaskManager will wait for execution +// to complete. GC workers that execute a stealing task remain in +// the stealing task until all stealing tasks have completed. The load +// balancing afforded by the stealing tasks work best if the stealing +// tasks are added last to the list. + void GCTaskManager::execute_and_wait(GCTaskQueue* list) { WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); list->enqueue(fin); add_list(list); - fin->wait_for(); + fin->wait_for(true /* reset */); // We have to release the barrier tasks! WaitForBarrierGCTask::destroy(fin); } @@ -692,6 +828,72 @@ } // +// IdleGCTask +// + +IdleGCTask* IdleGCTask::create() { + IdleGCTask* result = new IdleGCTask(false); + return result; +} + +IdleGCTask* IdleGCTask::create_on_c_heap() { + IdleGCTask* result = new(ResourceObj::C_HEAP) IdleGCTask(true); + return result; +} + +void IdleGCTask::do_it(GCTaskManager* manager, uint which) { + WaitForBarrierGCTask* wait_for_task = manager->idle_inactive_task(); + if (TraceGCTaskManager) { + tty->print_cr("[" INTPTR_FORMAT "]" + " IdleGCTask:::do_it()" + " should_wait: %s", + this, wait_for_task->should_wait() ? "true" : "false"); + } + MutexLockerEx ml(manager->monitor(), Mutex::_no_safepoint_check_flag); + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("--- idle %d", which); + } + // Increment has to be done when the idle tasks are created. + // manager->increment_idle_workers(); + manager->monitor()->notify_all(); + while (wait_for_task->should_wait()) { + if (TraceGCTaskManager) { + tty->print_cr("[" INTPTR_FORMAT "]" + " IdleGCTask::do_it()" + " [" INTPTR_FORMAT "] (%s)->wait()", + this, manager->monitor(), manager->monitor()->name()); + } + manager->monitor()->wait(Mutex::_no_safepoint_check_flag, 0); + } + manager->decrement_idle_workers(); + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("--- release %d", which); + } + if (TraceGCTaskManager) { + tty->print_cr("[" INTPTR_FORMAT "]" + " IdleGCTask::do_it() returns" + " should_wait: %s", + this, wait_for_task->should_wait() ? "true" : "false"); + } + // Release monitor(). +} + +void IdleGCTask::destroy(IdleGCTask* that) { + if (that != NULL) { + that->destruct(); + if (that->is_c_heap_obj()) { + FreeHeap(that); + } + } +} + +void IdleGCTask::destruct() { + // This has to know it's superclass structure, just like the constructor. + this->GCTask::destruct(); + // Nothing else to do. +} + +// // BarrierGCTask // @@ -768,7 +970,8 @@ } WaitForBarrierGCTask* WaitForBarrierGCTask::create_on_c_heap() { - WaitForBarrierGCTask* result = new WaitForBarrierGCTask(true); + WaitForBarrierGCTask* result = + new (ResourceObj::C_HEAP) WaitForBarrierGCTask(true); return result; } @@ -849,7 +1052,7 @@ } } -void WaitForBarrierGCTask::wait_for() { +void WaitForBarrierGCTask::wait_for(bool reset) { if (TraceGCTaskManager) { tty->print_cr("[" INTPTR_FORMAT "]" " WaitForBarrierGCTask::wait_for()" @@ -869,7 +1072,9 @@ monitor()->wait(Mutex::_no_safepoint_check_flag, 0); } // Reset the flag in case someone reuses this task. - set_should_wait(true); + if (reset) { + set_should_wait(true); + } if (TraceGCTaskManager) { tty->print_cr("[" INTPTR_FORMAT "]" " WaitForBarrierGCTask::wait_for() returns" diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -45,6 +45,7 @@ class ReleasingBarrierGCTask; class NotifyingBarrierGCTask; class WaitForBarrierGCTask; +class IdleGCTask; // A free list of Monitor*'s. class MonitorSupply; @@ -64,7 +65,8 @@ unknown_task, ordinary_task, barrier_task, - noop_task + noop_task, + idle_task }; static const char* to_string(kind value); }; @@ -108,6 +110,9 @@ bool is_noop_task() const { return kind()==Kind::noop_task; } + bool is_idle_task() const { + return kind()==Kind::idle_task; + } void print(const char* message) const PRODUCT_RETURN; protected: // Constructors: Only create subclasses. @@ -153,6 +158,7 @@ assert(((insert_end() == NULL && remove_end() == NULL) || (insert_end() != NULL && remove_end() != NULL)), "insert_end and remove_end don't match"); + assert((insert_end() != NULL) || (_length == 0), "Not empty"); return insert_end() == NULL; } uint length() const { @@ -204,6 +210,8 @@ GCTask* remove(); // Remove from remove end. GCTask* remove(GCTask* task); // Remove from the middle. void print(const char* message) const PRODUCT_RETURN; + // Debug support + void verify_length() const PRODUCT_RETURN; }; // A GCTaskQueue that can be synchronized. @@ -285,12 +293,76 @@ } }; +// Dynamic number of GC threads +// +// GC threads wait in get_task() for work (i.e., a task) to perform. +// When the number of GC threads was static, the number of tasks +// created to do a job was equal to or greater than the maximum +// number of GC threads (ParallelGCThreads). The job might be divided +// into a number of tasks greater than the number of GC threads for +// load balancing (i.e., over partitioning). The last task to be +// executed by a GC thread in a job is a work stealing task. A +// GC thread that gets a work stealing task continues to execute +// that task until the job is done. In the static number of GC theads +// case, tasks are added to a queue (FIFO). The work stealing tasks are +// the last to be added. Once the tasks are added, the GC threads grab +// a task and go. A single thread can do all the non-work stealing tasks +// and then execute a work stealing and wait for all the other GC threads +// to execute their work stealing task. +// In the dynamic number of GC threads implementation, idle-tasks are +// created to occupy the non-participating or "inactive" threads. An +// idle-task makes the GC thread wait on a barrier that is part of the +// GCTaskManager. The GC threads that have been "idled" in a IdleGCTask +// are released once all the active GC threads have finished their work +// stealing tasks. The GCTaskManager does not wait for all the "idled" +// GC threads to resume execution. When those GC threads do resume +// execution in the course of the thread scheduling, they call get_tasks() +// as all the other GC threads do. Because all the "idled" threads are +// not required to execute in order to finish a job, it is possible for +// a GC thread to still be "idled" when the next job is started. Such +// a thread stays "idled" for the next job. This can result in a new +// job not having all the expected active workers. For example if on +// job requests 4 active workers out of a total of 10 workers so the +// remaining 6 are "idled", if the next job requests 6 active workers +// but all 6 of the "idled" workers are still idle, then the next job +// will only get 4 active workers. +// The implementation for the parallel old compaction phase has an +// added complication. In the static case parold partitions the chunks +// ready to be filled into stacks, one for each GC thread. A GC thread +// executing a draining task (drains the stack of ready chunks) +// claims a stack according to it's id (the unique ordinal value assigned +// to each GC thread). In the dynamic case not all GC threads will +// actively participate so stacks with ready to fill chunks can only be +// given to the active threads. An initial implementation chose stacks +// number 1-n to get the ready chunks and required that GC threads +// 1-n be the active workers. This was undesirable because it required +// certain threads to participate. In the final implementation a +// list of stacks equal in number to the active workers are filled +// with ready chunks. GC threads that participate get a stack from +// the task (DrainStacksCompactionTask), empty the stack, and then add it to a +// recycling list at the end of the task. If the same GC thread gets +// a second task, it gets a second stack to drain and returns it. The +// stacks are added to a recycling list so that later stealing tasks +// for this tasks can get a stack from the recycling list. Stealing tasks +// use the stacks in its work in a way similar to the draining tasks. +// A thread is not guaranteed to get anything but a stealing task and +// a thread that only gets a stealing task has to get a stack. A failed +// implementation tried to have the GC threads keep the stack they used +// during a draining task for later use in the stealing task but that didn't +// work because as noted a thread is not guaranteed to get a draining task. +// +// For PSScavenge and ParCompactionManager the GC threads are +// held in the GCTaskThread** _thread array in GCTaskManager. + + class GCTaskManager : public CHeapObj { friend class ParCompactionManager; friend class PSParallelCompact; friend class PSScavenge; friend class PSRefProcTaskExecutor; friend class RefProcTaskExecutor; + friend class GCTaskThread; + friend class IdleGCTask; private: // Instance state. NotifyDoneClosure* _ndc; // Notify on completion. @@ -298,6 +370,7 @@ Monitor* _monitor; // Notification of changes. SynchronizedGCTaskQueue* _queue; // Queue of tasks. GCTaskThread** _thread; // Array of worker threads. + uint _active_workers; // Number of active workers. uint _busy_workers; // Number of busy workers. uint _blocking_worker; // The worker that's blocking. bool* _resource_flag; // Array of flag per threads. @@ -307,6 +380,8 @@ uint _emptied_queue; // Times we emptied the queue. NoopGCTask* _noop_task; // The NoopGCTask instance. uint _noop_tasks; // Count of noop tasks. + WaitForBarrierGCTask* _idle_inactive_task;// Task for inactive workers + volatile uint _idle_workers; // Number of idled workers public: // Factory create and destroy methods. static GCTaskManager* create(uint workers) { @@ -324,6 +399,9 @@ uint busy_workers() const { return _busy_workers; } + volatile uint idle_workers() const { + return _idle_workers; + } // Pun between Monitor* and Mutex* Monitor* monitor() const { return _monitor; @@ -331,6 +409,9 @@ Monitor * lock() const { return _monitor; } + WaitForBarrierGCTask* idle_inactive_task() { + return _idle_inactive_task; + } // Methods. // Add the argument task to be run. void add_task(GCTask* task); @@ -350,6 +431,10 @@ bool should_release_resources(uint which); // Predicate. // Note the release of resources by the argument worker. void note_release(uint which); + // Create IdleGCTasks for inactive workers and start workers + void task_idle_workers(); + // Release the workers in IdleGCTasks + void release_idle_workers(); // Constants. // A sentinel worker identifier. static uint sentinel_worker() { @@ -375,6 +460,15 @@ uint workers() const { return _workers; } + void set_active_workers(uint v) { + assert(v <= _workers, "Trying to set more workers active than there are"); + _active_workers = MIN2(v, _workers); + assert(v != 0, "Trying to set active workers to 0"); + _active_workers = MAX2(1U, _active_workers); + } + // Sets the number of threads that will be used in a collection + void set_active_gang(); + NotifyDoneClosure* notify_done_closure() const { return _ndc; } @@ -457,8 +551,21 @@ void reset_noop_tasks() { _noop_tasks = 0; } + void increment_idle_workers() { + _idle_workers++; + } + void decrement_idle_workers() { + _idle_workers--; + } // Other methods. void initialize(); + + public: + // Return true if all workers are currently active. + bool all_workers_active() { return workers() == active_workers(); } + uint active_workers() const { + return _active_workers; + } }; // @@ -475,6 +582,8 @@ static NoopGCTask* create(); static NoopGCTask* create_on_c_heap(); static void destroy(NoopGCTask* that); + + virtual char* name() { return (char *)"noop task"; } // Methods from GCTask. void do_it(GCTaskManager* manager, uint which) { // Nothing to do. @@ -518,6 +627,8 @@ } // Destructor-like method. void destruct(); + + virtual char* name() { return (char *)"barrier task"; } // Methods. // Wait for this to be the only task running. void do_it_internal(GCTaskManager* manager, uint which); @@ -586,11 +697,13 @@ // the BarrierGCTask is done. // This may cover many of the uses of NotifyingBarrierGCTasks. class WaitForBarrierGCTask : public BarrierGCTask { + friend class GCTaskManager; + friend class IdleGCTask; private: // Instance state. - Monitor* _monitor; // Guard and notify changes. - bool _should_wait; // true=>wait, false=>proceed. - const bool _is_c_heap_obj; // Was allocated on the heap. + Monitor* _monitor; // Guard and notify changes. + volatile bool _should_wait; // true=>wait, false=>proceed. + const bool _is_c_heap_obj; // Was allocated on the heap. public: virtual char* name() { return (char *) "waitfor-barrier-task"; } @@ -600,7 +713,10 @@ static void destroy(WaitForBarrierGCTask* that); // Methods. void do_it(GCTaskManager* manager, uint which); - void wait_for(); + void wait_for(bool reset); + void set_should_wait(bool value) { + _should_wait = value; + } protected: // Constructor. Clients use factory, but there might be subclasses. WaitForBarrierGCTask(bool on_c_heap); @@ -613,12 +729,36 @@ bool should_wait() const { return _should_wait; } - void set_should_wait(bool value) { - _should_wait = value; + bool is_c_heap_obj() { + return _is_c_heap_obj; } +}; + +// Task that is used to idle a GC task when fewer than +// the maximum workers are wanted. +class IdleGCTask : public GCTask { + const bool _is_c_heap_obj; // Was allocated on the heap. + public: bool is_c_heap_obj() { return _is_c_heap_obj; } + // Factory create and destroy methods. + static IdleGCTask* create(); + static IdleGCTask* create_on_c_heap(); + static void destroy(IdleGCTask* that); + + virtual char* name() { return (char *)"idle task"; } + // Methods from GCTask. + virtual void do_it(GCTaskManager* manager, uint which); +protected: + // Constructor. + IdleGCTask(bool on_c_heap) : + GCTask(GCTask::Kind::idle_task), + _is_c_heap_obj(on_c_heap) { + // Nothing to do. + } + // Destructor-like method. + void destruct(); }; class MonitorSupply : public AllStatic { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -93,6 +93,11 @@ st->cr(); } +// GC workers get tasks from the GCTaskManager and execute +// them in this method. If there are no tasks to execute, +// the GC workers wait in the GCTaskManager's get_task() +// for tasks to be enqueued for execution. + void GCTaskThread::run() { // Set up the thread for stack overflow support this->record_stack_base_and_size(); @@ -124,7 +129,6 @@ for (; /* break */; ) { // This will block until there is a task to be gotten. GCTask* task = manager()->get_task(which()); - // In case the update is costly if (PrintGCTaskTimeStamps) { timer.update(); @@ -134,18 +138,28 @@ char* name = task->name(); task->do_it(manager(), which()); - manager()->note_completion(which()); + + if (!task->is_idle_task()) { + manager()->note_completion(which()); - if (PrintGCTaskTimeStamps) { - assert(_time_stamps != NULL, "Sanity (PrintGCTaskTimeStamps set late?)"); + if (PrintGCTaskTimeStamps) { + assert(_time_stamps != NULL, + "Sanity (PrintGCTaskTimeStamps set late?)"); + + timer.update(); - timer.update(); - - GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++); + GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++); - time_stamp->set_name(name); - time_stamp->set_entry_time(entry_time); - time_stamp->set_exit_time(timer.ticks()); + time_stamp->set_name(name); + time_stamp->set_entry_time(entry_time); + time_stamp->set_exit_time(timer.ticks()); + } + } else { + // idle tasks complete outside the normal accounting + // so that a task can complete without waiting for idle tasks. + // They have to be terminated separately. + IdleGCTask::destroy((IdleGCTask*)task); + set_is_working(true); } // Check if we should release our inner resources. diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -35,6 +35,7 @@ class GCTaskManager; class GCTaskThread : public WorkerThread { + friend class GCTaskManager; private: // Instance state. GCTaskManager* _manager; // Manager for worker. @@ -45,6 +46,8 @@ GCTaskTimeStamp* time_stamp_at(uint index); + bool _is_working; // True if participating in GC tasks + public: // Factory create and destroy methods. static GCTaskThread* create(GCTaskManager* manager, @@ -84,6 +87,7 @@ uint processor_id() const { return _processor_id; } + void set_is_working(bool v) { _is_working = v; } }; class GCTaskTimeStamp : public CHeapObj diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -152,15 +152,16 @@ { ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); + uint active_gc_threads = heap->gc_task_manager()->active_workers(); RegionTaskQueueSet* qset = ParCompactionManager::region_array(); - ParallelTaskTerminator terminator(parallel_gc_threads, qset); + ParallelTaskTerminator terminator(active_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); for(uint i=0; ienqueue(new RefProcTaskProxy(task, i)); } if (task.marks_oops_alive()) { if (parallel_gc_threads>1) { - for (uint j=0; jenqueue(new StealMarkingTask(&terminator)); } } @@ -216,7 +217,6 @@ // StealRegionCompactionTask // - StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t): _terminator(t) {} @@ -229,6 +229,32 @@ ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); + + // If not all threads are active, get a draining stack + // from the list. Else, just use this threads draining stack. + uint which_stack_index; + bool use_all_workers = manager->all_workers_active(); + if (use_all_workers) { + which_stack_index = which; + assert(manager->active_workers() == ParallelGCThreads, + err_msg("all_workers_active has been incorrectly set: " + " active %d ParallelGCThreads %d", manager->active_workers(), + ParallelGCThreads)); + } else { + which_stack_index = ParCompactionManager::pop_recycled_stack_index(); + } + + cm->set_region_stack_index(which_stack_index); + cm->set_region_stack(ParCompactionManager::region_list(which_stack_index)); + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("StealRegionCompactionTask::do_it " + "region_stack_index %d region_stack = 0x%x " + " empty (%d) use all workers %d", + which_stack_index, ParCompactionManager::region_list(which_stack_index), + cm->region_stack()->is_empty(), + use_all_workers); + } + // Has to drain stacks first because there may be regions on // preloaded onto the stack and this thread may never have // done a draining task. Are the draining tasks needed? @@ -285,6 +311,50 @@ ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); + uint which_stack_index; + bool use_all_workers = manager->all_workers_active(); + if (use_all_workers) { + which_stack_index = which; + assert(manager->active_workers() == ParallelGCThreads, + err_msg("all_workers_active has been incorrectly set: " + " active %d ParallelGCThreads %d", manager->active_workers(), + ParallelGCThreads)); + } else { + which_stack_index = stack_index(); + } + + cm->set_region_stack(ParCompactionManager::region_list(which_stack_index)); + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("DrainStacksCompactionTask::do_it which = %d " + "which_stack_index = %d/empty(%d) " + "use all workers %d", + which, which_stack_index, + cm->region_stack()->is_empty(), + use_all_workers); + } + + cm->set_region_stack_index(which_stack_index); + // Process any regions already in the compaction managers stacks. cm->drain_region_stacks(); + + assert(cm->region_stack()->is_empty(), "Not empty"); + + if (!use_all_workers) { + // Always give up the region stack. + assert(cm->region_stack() == + ParCompactionManager::region_list(cm->region_stack_index()), + "region_stack and region_stack_index are inconsistent"); + ParCompactionManager::push_recycled_stack_index(cm->region_stack_index()); + + if (TraceDynamicGCThreads) { + void* old_region_stack = (void*) cm->region_stack(); + int old_region_stack_index = cm->region_stack_index(); + gclog_or_tty->print_cr("Pushing region stack 0x%x/%d", + old_region_stack, old_region_stack_index); + } + + cm->set_region_stack(NULL); + cm->set_region_stack_index((uint)max_uintx); + } } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,9 @@ PSOldGen* ParCompactionManager::_old_gen = NULL; ParCompactionManager** ParCompactionManager::_manager_array = NULL; + +RegionTaskQueue** ParCompactionManager::_region_list = NULL; + OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; ParCompactionManager::ObjArrayTaskQueueSet* ParCompactionManager::_objarray_queues = NULL; @@ -46,8 +49,14 @@ ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; +uint* ParCompactionManager::_recycled_stack_index = NULL; +int ParCompactionManager::_recycled_top = -1; +int ParCompactionManager::_recycled_bottom = -1; + ParCompactionManager::ParCompactionManager() : - _action(CopyAndUpdate) { + _action(CopyAndUpdate), + _region_stack(NULL), + _region_stack_index((uint)max_uintx) { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); @@ -57,7 +66,10 @@ marking_stack()->initialize(); _objarray_stack.initialize(); - region_stack()->initialize(); +} + +ParCompactionManager::~ParCompactionManager() { + delete _recycled_stack_index; } void ParCompactionManager::initialize(ParMarkBitMap* mbm) { @@ -72,6 +84,19 @@ _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 ); guarantee(_manager_array != NULL, "Could not allocate manager_array"); + _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*, + parallel_gc_threads+1); + guarantee(_region_list != NULL, "Could not initialize promotion manager"); + + _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads); + + // parallel_gc-threads + 1 to be consistent with the number of + // compaction managers. + for(uint i=0; iinitialize(); + } + _stack_array = new OopTaskQueueSet(parallel_gc_threads); guarantee(_stack_array != NULL, "Could not allocate stack_array"); _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads); @@ -85,7 +110,7 @@ guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager"); stack_array()->register_queue(i, _manager_array[i]->marking_stack()); _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_stack); - region_array()->register_queue(i, _manager_array[i]->region_stack()); + region_array()->register_queue(i, region_list(i)); } // The VMThread gets its own ParCompactionManager, which is not available @@ -97,6 +122,29 @@ "Not initialized?"); } +int ParCompactionManager::pop_recycled_stack_index() { + assert(_recycled_bottom <= _recycled_top, "list is empty"); + // Get the next available index + if (_recycled_bottom < _recycled_top) { + uint cur, next, last; + do { + cur = _recycled_bottom; + next = cur + 1; + last = Atomic::cmpxchg(next, &_recycled_bottom, cur); + } while (cur != last); + return _recycled_stack_index[next]; + } else { + return -1; + } +} + +void ParCompactionManager::push_recycled_stack_index(uint v) { + // Get the next available index + int cur = Atomic::add(1, &_recycled_top); + _recycled_stack_index[cur] = v; + assert(_recycled_bottom <= _recycled_top, "list top and bottom are wrong"); +} + bool ParCompactionManager::should_update() { assert(action() != NotValid, "Action is not set"); return (action() == ParCompactionManager::Update) || @@ -111,14 +159,13 @@ (action() == ParCompactionManager::UpdateAndCopy); } -bool ParCompactionManager::should_verify_only() { - assert(action() != NotValid, "Action is not set"); - return action() == ParCompactionManager::VerifyUpdate; +void ParCompactionManager::region_list_push(uint list_index, + size_t region_index) { + region_list(list_index)->push(region_index); } -bool ParCompactionManager::should_reset_only() { - assert(action() != NotValid, "Action is not set"); - return action() == ParCompactionManager::ResetObjects; +void ParCompactionManager::verify_region_list_empty(uint list_index) { + assert(region_list(list_index)->is_empty(), "Not empty"); } ParCompactionManager* diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,6 +48,7 @@ friend class StealRegionCompactionTask; friend class UpdateAndFillClosure; friend class RefProcTaskExecutor; + friend class IdleGCTask; public: @@ -58,8 +59,6 @@ Copy, UpdateAndCopy, CopyAndUpdate, - VerifyUpdate, - ResetObjects, NotValid }; // ------------------------ End don't putback if not needed @@ -85,7 +84,31 @@ // Is there a way to reuse the _marking_stack for the // saving empty regions? For now just create a different // type of TaskQueue. - RegionTaskQueue _region_stack; + RegionTaskQueue* _region_stack; + + static RegionTaskQueue** _region_list; + // Index in _region_list for current _region_stack. + uint _region_stack_index; + + // Indexes of recycled region stacks/overflow stacks + // Stacks of regions to be compacted are embedded in the tasks doing + // the compaction. A thread that executes the task extracts the + // region stack and drains it. These threads keep these region + // stacks for use during compaction task stealing. If a thread + // gets a second draining task, it pushed its current region stack + // index into the array _recycled_stack_index and gets a new + // region stack from the task. A thread that is executing a + // compaction stealing task without ever having executing a + // draining task, will get a region stack from _recycled_stack_index. + // + // Array of indexes into the array of region stacks. + static uint* _recycled_stack_index; + // The index into _recycled_stack_index of the last region stack index + // pushed. If -1, there are no entries into _recycled_stack_index. + static int _recycled_top; + // The index into _recycled_stack_index of the last region stack index + // popped. If -1, there has not been any entry popped. + static int _recycled_bottom; Stack _revisit_klass_stack; Stack _revisit_mdo_stack; @@ -104,7 +127,6 @@ // Array of tasks. Needed by the ParallelTaskTerminator. static RegionTaskQueueSet* region_array() { return _region_array; } OverflowTaskQueue* marking_stack() { return &_marking_stack; } - RegionTaskQueue* region_stack() { return &_region_stack; } // Pushes onto the marking stack. If the marking stack is full, // pushes onto the overflow stack. @@ -116,10 +138,33 @@ Action action() { return _action; } void set_action(Action v) { _action = v; } + RegionTaskQueue* region_stack() { return _region_stack; } + void set_region_stack(RegionTaskQueue* v) { _region_stack = v; } + inline static ParCompactionManager* manager_array(int index); + inline static RegionTaskQueue* region_list(int index) { + return _region_list[index]; + } + + uint region_stack_index() { return _region_stack_index; } + void set_region_stack_index(uint v) { _region_stack_index = v; } + + // Pop and push unique reusable stack index + static int pop_recycled_stack_index(); + static void push_recycled_stack_index(uint v); + static void reset_recycled_stack_index() { + _recycled_bottom = _recycled_top = -1; + } + ParCompactionManager(); + ~ParCompactionManager(); + // Pushes onto the region stack at the given index. If the + // region stack is full, + // pushes onto the region overflow stack. + static void region_list_push(uint stack_index, size_t region_index); + static void verify_region_list_empty(uint stack_index); ParMarkBitMap* mark_bitmap() { return _mark_bitmap; } // Take actions in preparation for a compaction. @@ -129,8 +174,6 @@ bool should_update(); bool should_copy(); - bool should_verify_only(); - bool should_reset_only(); Stack* revisit_klass_stack() { return &_revisit_klass_stack; } Stack* revisit_mdo_stack() { return &_revisit_mdo_stack; } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -96,7 +96,8 @@ * by the MarkSweepAlwaysCompactCount parameter. This is a significant * performance improvement! */ - bool skip_dead = ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0); + bool skip_dead = (MarkSweepAlwaysCompactCount < 1) + || ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0); size_t allowed_deadspace = 0; if (skip_dead) { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -2045,6 +2045,11 @@ ResourceMark rm; HandleMark hm; + // Set the number of GC threads to be used in this collection + gc_task_manager()->set_active_gang(); + gc_task_manager()->task_idle_workers(); + heap->set_par_threads(gc_task_manager()->active_workers()); + const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc; // This is useful for debugging but don't change the output the @@ -2197,6 +2202,7 @@ // Track memory usage and detect low memory MemoryService::track_memory_usage(); heap->update_counters(); + gc_task_manager()->release_idle_workers(); } #ifdef ASSERT @@ -2204,7 +2210,7 @@ ParCompactionManager* const cm = ParCompactionManager::manager_array(int(i)); assert(cm->marking_stack()->is_empty(), "should be empty"); - assert(cm->region_stack()->is_empty(), "should be empty"); + assert(ParCompactionManager::region_list(int(i))->is_empty(), "should be empty"); assert(cm->revisit_klass_stack()->is_empty(), "should be empty"); } #endif // ASSERT @@ -2351,8 +2357,9 @@ ParallelScavengeHeap* heap = gc_heap(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); + uint active_gc_threads = heap->gc_task_manager()->active_workers(); TaskQueueSetSuper* qset = ParCompactionManager::region_array(); - ParallelTaskTerminator terminator(parallel_gc_threads, qset); + ParallelTaskTerminator terminator(active_gc_threads, qset); PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); PSParallelCompact::FollowStackClosure follow_stack_closure(cm); @@ -2374,21 +2381,13 @@ q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti)); q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache)); - if (parallel_gc_threads > 1) { - for (uint j = 0; j < parallel_gc_threads; j++) { + if (active_gc_threads > 1) { + for (uint j = 0; j < active_gc_threads; j++) { q->enqueue(new StealMarkingTask(&terminator)); } } - WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); - q->enqueue(fin); - - gc_task_manager()->add_list(q); - - fin->wait_for(); - - // We have to release the barrier tasks! - WaitForBarrierGCTask::destroy(fin); + gc_task_manager()->execute_and_wait(q); } // Process reference objects found during marking @@ -2483,10 +2482,22 @@ { TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); - const unsigned int task_count = MAX2(parallel_gc_threads, 1U); - for (unsigned int j = 0; j < task_count; j++) { + // Find the threads that are active + unsigned int which = 0; + + const uint task_count = MAX2(parallel_gc_threads, 1U); + for (uint j = 0; j < task_count; j++) { q->enqueue(new DrainStacksCompactionTask(j)); + ParCompactionManager::verify_region_list_empty(j); + // Set the region stacks variables to "no" region stack values + // so that they will be recognized and needing a region stack + // in the stealing tasks if they do not get one by executing + // a draining stack. + ParCompactionManager* cm = ParCompactionManager::manager_array(j); + cm->set_region_stack(NULL); + cm->set_region_stack_index((uint)max_uintx); } + ParCompactionManager::reset_recycled_stack_index(); // Find all regions that are available (can be filled immediately) and // distribute them to the thread stacks. The iteration is done in reverse @@ -2495,8 +2506,10 @@ const ParallelCompactData& sd = PSParallelCompact::summary_data(); size_t fillable_regions = 0; // A count for diagnostic purposes. - unsigned int which = 0; // The worker thread number. - + // A region index which corresponds to the tasks created above. + // "which" must be 0 <= which < task_count + + which = 0; for (unsigned int id = to_space_id; id > perm_space_id; --id) { SpaceInfo* const space_info = _space_info + id; MutableSpace* const space = space_info->space(); @@ -2509,8 +2522,7 @@ for (size_t cur = end_region - 1; cur >= beg_region; --cur) { if (sd.region(cur)->claim_unsafe()) { - ParCompactionManager* cm = ParCompactionManager::manager_array(which); - cm->push_region(cur); + ParCompactionManager::region_list_push(which, cur); if (TraceParallelOldGCCompactionPhase && Verbose) { const size_t count_mod_8 = fillable_regions & 7; @@ -2521,8 +2533,10 @@ NOT_PRODUCT(++fillable_regions;) - // Assign regions to threads in round-robin fashion. + // Assign regions to tasks in round-robin fashion. if (++which == task_count) { + assert(which <= parallel_gc_threads, + "Inconsistent number of workers"); which = 0; } } @@ -2642,26 +2656,19 @@ PSOldGen* old_gen = heap->old_gen(); old_gen->start_array()->reset(); uint parallel_gc_threads = heap->gc_task_manager()->workers(); + uint active_gc_threads = heap->gc_task_manager()->active_workers(); TaskQueueSetSuper* qset = ParCompactionManager::region_array(); - ParallelTaskTerminator terminator(parallel_gc_threads, qset); + ParallelTaskTerminator terminator(active_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); - enqueue_region_draining_tasks(q, parallel_gc_threads); - enqueue_dense_prefix_tasks(q, parallel_gc_threads); - enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); + enqueue_region_draining_tasks(q, active_gc_threads); + enqueue_dense_prefix_tasks(q, active_gc_threads); + enqueue_region_stealing_tasks(q, &terminator, active_gc_threads); { TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); - WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); - q->enqueue(fin); - - gc_task_manager()->add_list(q); - - fin->wait_for(); - - // We have to release the barrier tasks! - WaitForBarrierGCTask::destroy(fin); + gc_task_manager()->execute_and_wait(q); #ifdef ASSERT // Verify that all regions have been processed before the deferred updates. @@ -2729,6 +2736,9 @@ PSParallelCompact::follow_weak_klass_links() { // All klasses on the revisit stack are marked at this point. // Update and follow all subklass, sibling and implementor links. + // Check all the stacks here even if not all the workers are active. + // There is no accounting which indicates which stacks might have + // contents to be followed. if (PrintRevisitStats) { gclog_or_tty->print_cr("#classes in system dictionary = %d", SystemDictionary::number_of_classes()); @@ -3360,20 +3370,7 @@ HeapWord* beg_addr = sp->bottom(); HeapWord* end_addr = sp->top(); -#ifdef ASSERT assert(beg_addr <= dp_addr && dp_addr <= end_addr, "bad dense prefix"); - if (cm->should_verify_only()) { - VerifyUpdateClosure verify_update(cm, sp); - bitmap->iterate(&verify_update, beg_addr, end_addr); - return; - } - - if (cm->should_reset_only()) { - ResetObjectsClosure reset_objects(cm); - bitmap->iterate(&reset_objects, beg_addr, end_addr); - return; - } -#endif const size_t beg_region = sd.addr_to_region_idx(beg_addr); const size_t dp_region = sd.addr_to_region_idx(dp_addr); @@ -3492,35 +3489,6 @@ return ParMarkBitMap::incomplete; } -// Verify the new location using the forwarding pointer -// from MarkSweep::mark_sweep_phase2(). Set the mark_word -// to the initial value. -ParMarkBitMapClosure::IterationStatus -PSParallelCompact::VerifyUpdateClosure::do_addr(HeapWord* addr, size_t words) { - // The second arg (words) is not used. - oop obj = (oop) addr; - HeapWord* forwarding_ptr = (HeapWord*) obj->mark()->decode_pointer(); - HeapWord* new_pointer = summary_data().calc_new_pointer(obj); - if (forwarding_ptr == NULL) { - // The object is dead or not moving. - assert(bitmap()->is_unmarked(obj) || (new_pointer == (HeapWord*) obj), - "Object liveness is wrong."); - return ParMarkBitMap::incomplete; - } - assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 || - forwarding_ptr == new_pointer, "new location is incorrect"); - return ParMarkBitMap::incomplete; -} - -// Reset objects modified for debug checking. -ParMarkBitMapClosure::IterationStatus -PSParallelCompact::ResetObjectsClosure::do_addr(HeapWord* addr, size_t words) { - // The second arg (words) is not used. - oop obj = (oop) addr; - obj->init_mark(); - return ParMarkBitMap::incomplete; -} - // Prepare for compaction. This method is executed once // (i.e., by a single thread) before compaction. // Save the updated location of the intArrayKlassObj for diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -832,31 +832,6 @@ virtual void do_code_blob(CodeBlob* cb) const { } }; - // Closure for verifying update of pointers. Does not - // have any side effects. - class VerifyUpdateClosure: public ParMarkBitMapClosure { - const MutableSpace* _space; // Is this ever used? - - public: - VerifyUpdateClosure(ParCompactionManager* cm, const MutableSpace* sp) : - ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm), _space(sp) - { } - - virtual IterationStatus do_addr(HeapWord* addr, size_t words); - - const MutableSpace* space() { return _space; } - }; - - // Closure for updating objects altered for debug checking - class ResetObjectsClosure: public ParMarkBitMapClosure { - public: - ResetObjectsClosure(ParCompactionManager* cm): - ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm) - { } - - virtual IterationStatus do_addr(HeapWord* addr, size_t words); - }; - friend class KeepAliveClosure; friend class FollowStackClosure; friend class AdjustPointerClosure; @@ -1183,10 +1158,6 @@ // Update the deferred objects in the space. static void update_deferred_objects(ParCompactionManager* cm, SpaceId id); - // Mark pointer and follow contents. - template - static inline void mark_and_follow(ParCompactionManager* cm, T* p); - static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; } static ParallelCompactData& summary_data() { return _summary_data; } @@ -1283,20 +1254,6 @@ } template -inline void PSParallelCompact::mark_and_follow(ParCompactionManager* cm, - T* p) { - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - if (mark_bitmap()->is_unmarked(obj)) { - if (mark_obj(obj)) { - obj->follow_contents(cm); - } - } - } -} - -template inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) { T heap_oop = oopDesc::load_heap_oop(p); if (!oopDesc::is_null(heap_oop)) { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -181,28 +181,29 @@ void PSRefProcTaskExecutor::execute(ProcessTask& task) { GCTaskQueue* q = GCTaskQueue::create(); - for(uint i=0; iactive_workers(); i++) { q->enqueue(new PSRefProcTaskProxy(task, i)); } - ParallelTaskTerminator terminator( - ParallelScavengeHeap::gc_task_manager()->workers(), + ParallelTaskTerminator terminator(manager->active_workers(), (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()); - if (task.marks_oops_alive() && ParallelGCThreads > 1) { - for (uint j=0; jactive_workers() > 1) { + for (uint j = 0; j < manager->active_workers(); j++) { q->enqueue(new StealTask(&terminator)); } } - ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q); + manager->execute_and_wait(q); } void PSRefProcTaskExecutor::execute(EnqueueTask& task) { GCTaskQueue* q = GCTaskQueue::create(); - for(uint i=0; iactive_workers(); i++) { q->enqueue(new PSRefEnqueueTaskProxy(task, i)); } - ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q); + manager->execute_and_wait(q); } // This method contains all heap specific policy for invoking scavenge. @@ -375,6 +376,14 @@ // Release all previously held resources gc_task_manager()->release_all_resources(); + // Set the number of GC threads to be used in this collection + gc_task_manager()->set_active_gang(); + gc_task_manager()->task_idle_workers(); + // Get the active number of workers here and use that value + // throughout the methods. + uint active_workers = gc_task_manager()->active_workers(); + heap->set_par_threads(active_workers); + PSPromotionManager::pre_scavenge(); // We'll use the promotion manager again later. @@ -385,8 +394,9 @@ GCTaskQueue* q = GCTaskQueue::create(); - for(uint i=0; ienqueue(new OldToYoungRootsTask(old_gen, old_top, i)); + uint stripe_total = active_workers; + for(uint i=0; i < stripe_total; i++) { + q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i, stripe_total)); } q->enqueue(new SerialOldToYoungRootsTask(perm_gen, perm_top)); @@ -403,10 +413,10 @@ q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache)); ParallelTaskTerminator terminator( - gc_task_manager()->workers(), + active_workers, (TaskQueueSetSuper*) promotion_manager->stack_array_depth()); - if (ParallelGCThreads>1) { - for (uint j=0; j 1) { + for (uint j = 0; j < active_workers; j++) { q->enqueue(new StealTask(&terminator)); } } @@ -419,6 +429,7 @@ // Process reference objects discovered during scavenge { reference_processor()->setup_policy(false); // not always_clear + reference_processor()->set_active_mt_degree(active_workers); PSKeepAliveClosure keep_alive(promotion_manager); PSEvacuateFollowersClosure evac_followers(promotion_manager); if (reference_processor()->processing_is_mt()) { @@ -622,6 +633,8 @@ // Track memory usage and detect low memory MemoryService::track_memory_usage(); heap->update_counters(); + + gc_task_manager()->release_idle_workers(); } if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) { @@ -804,6 +817,7 @@ // Initialize ref handling object for scavenging. MemRegion mr = young_gen->reserved(); + _ref_processor = new ReferenceProcessor(mr, // span ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -202,7 +202,8 @@ _gen->object_space(), _gen_top, pm, - _stripe_number); + _stripe_number, + _stripe_total); // Do the real work pm->drain_stacks(false); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -135,16 +135,63 @@ // OldToYoungRootsTask // // This task is used to scan old to young roots in parallel +// +// A GC thread executing this tasks divides the generation (old gen) +// into slices and takes a stripe in the slice as its part of the +// work. +// +// +===============+ slice 0 +// | stripe 0 | +// +---------------+ +// | stripe 1 | +// +---------------+ +// | stripe 2 | +// +---------------+ +// | stripe 3 | +// +===============+ slice 1 +// | stripe 0 | +// +---------------+ +// | stripe 1 | +// +---------------+ +// | stripe 2 | +// +---------------+ +// | stripe 3 | +// +===============+ slice 2 +// ... +// +// A task is created for each stripe. In this case there are 4 tasks +// created. A GC thread first works on its stripe within slice 0 +// and then moves to its stripe in the next slice until all stripes +// exceed the top of the generation. Note that having fewer GC threads +// than stripes works because all the tasks are executed so all stripes +// will be covered. In this example if 4 tasks have been created to cover +// all the stripes and there are only 3 threads, one of the threads will +// get the tasks with the 4th stripe. However, there is a dependence in +// CardTableExtension::scavenge_contents_parallel() on the number +// of tasks created. In scavenge_contents_parallel the distance +// to the next stripe is calculated based on the number of tasks. +// If the stripe width is ssize, a task's next stripe is at +// ssize * number_of_tasks (= slice_stride). In this case after +// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1 +// by adding slice_stride to the start of stripe 0 in slice 0 to get +// to the start of stride 0 in slice 1. class OldToYoungRootsTask : public GCTask { private: PSOldGen* _gen; HeapWord* _gen_top; uint _stripe_number; + uint _stripe_total; public: - OldToYoungRootsTask(PSOldGen *gen, HeapWord* gen_top, uint stripe_number) : - _gen(gen), _gen_top(gen_top), _stripe_number(stripe_number) { } + OldToYoungRootsTask(PSOldGen *gen, + HeapWord* gen_top, + uint stripe_number, + uint stripe_total) : + _gen(gen), + _gen_top(gen_top), + _stripe_number(stripe_number), + _stripe_total(stripe_total) { } char* name() { return (char *)"old-to-young-roots-task"; } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp --- a/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -28,8 +28,10 @@ #include "memory/collectorPolicy.hpp" #include "runtime/timer.hpp" #include "utilities/ostream.hpp" +#include "utilities/workgroup.hpp" elapsedTimer AdaptiveSizePolicy::_minor_timer; elapsedTimer AdaptiveSizePolicy::_major_timer; +bool AdaptiveSizePolicy::_debug_perturbation = false; // The throughput goal is implemented as // _throughput_goal = 1 - ( 1 / (1 + gc_cost_ratio)) @@ -88,6 +90,134 @@ _young_gen_policy_is_ready = false; } +// If the number of GC threads was set on the command line, +// use it. +// Else +// Calculate the number of GC threads based on the number of Java threads. +// Calculate the number of GC threads based on the size of the heap. +// Use the larger. + +int AdaptiveSizePolicy::calc_default_active_workers(uintx total_workers, + const uintx min_workers, + uintx active_workers, + uintx application_workers) { + // If the user has specifically set the number of + // GC threads, use them. + + // If the user has turned off using a dynamic number of GC threads + // or the users has requested a specific number, set the active + // number of workers to all the workers. + + uintx new_active_workers = total_workers; + uintx prev_active_workers = active_workers; + uintx active_workers_by_JT = 0; + uintx active_workers_by_heap_size = 0; + + // Always use at least min_workers but use up to + // GCThreadsPerJavaThreads * application threads. + active_workers_by_JT = + MAX2((uintx) GCWorkersPerJavaThread * application_workers, + min_workers); + + // Choose a number of GC threads based on the current size + // of the heap. This may be complicated because the size of + // the heap depends on factors such as the thoughput goal. + // Still a large heap should be collected by more GC threads. + active_workers_by_heap_size = + MAX2((size_t) 2U, Universe::heap()->capacity() / HeapSizePerGCThread); + + uintx max_active_workers = + MAX2(active_workers_by_JT, active_workers_by_heap_size); + + // Limit the number of workers to the the number created, + // (workers()). + new_active_workers = MIN2(max_active_workers, + (uintx) total_workers); + + // Increase GC workers instantly but decrease them more + // slowly. + if (new_active_workers < prev_active_workers) { + new_active_workers = + MAX2(min_workers, (prev_active_workers + new_active_workers) / 2); + } + + // Check once more that the number of workers is within the limits. + assert(min_workers <= total_workers, "Minimum workers not consistent with total workers"); + assert(new_active_workers >= min_workers, "Minimum workers not observed"); + assert(new_active_workers <= total_workers, "Total workers not observed"); + + if (ForceDynamicNumberOfGCThreads) { + // Assume this is debugging and jiggle the number of GC threads. + if (new_active_workers == prev_active_workers) { + if (new_active_workers < total_workers) { + new_active_workers++; + } else if (new_active_workers > min_workers) { + new_active_workers--; + } + } + if (new_active_workers == total_workers) { + if (_debug_perturbation) { + new_active_workers = min_workers; + } + _debug_perturbation = !_debug_perturbation; + } + assert((new_active_workers <= (uintx) ParallelGCThreads) && + (new_active_workers >= min_workers), + "Jiggled active workers too much"); + } + + if (TraceDynamicGCThreads) { + gclog_or_tty->print_cr("GCTaskManager::calc_default_active_workers() : " + "active_workers(): %d new_acitve_workers: %d " + "prev_active_workers: %d\n" + " active_workers_by_JT: %d active_workers_by_heap_size: %d", + active_workers, new_active_workers, prev_active_workers, + active_workers_by_JT, active_workers_by_heap_size); + } + assert(new_active_workers > 0, "Always need at least 1"); + return new_active_workers; +} + +int AdaptiveSizePolicy::calc_active_workers(uintx total_workers, + uintx active_workers, + uintx application_workers) { + // If the user has specifically set the number of + // GC threads, use them. + + // If the user has turned off using a dynamic number of GC threads + // or the users has requested a specific number, set the active + // number of workers to all the workers. + + int new_active_workers; + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { + new_active_workers = total_workers; + } else { + new_active_workers = calc_default_active_workers(total_workers, + 2, /* Minimum number of workers */ + active_workers, + application_workers); + } + assert(new_active_workers > 0, "Always need at least 1"); + return new_active_workers; +} + +int AdaptiveSizePolicy::calc_active_conc_workers(uintx total_workers, + uintx active_workers, + uintx application_workers) { + if (!UseDynamicNumberOfGCThreads || + (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) { + return ConcGCThreads; + } else { + int no_of_gc_threads = calc_default_active_workers( + total_workers, + 1, /* Minimum number of workers */ + active_workers, + application_workers); + return no_of_gc_threads; + } +} + bool AdaptiveSizePolicy::tenuring_threshold_change() const { return decrement_tenuring_threshold_for_gc_cost() || increment_tenuring_threshold_for_gc_cost() || diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp --- a/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -187,6 +187,8 @@ julong _young_gen_change_for_minor_throughput; julong _old_gen_change_for_major_throughput; + static const uint GCWorkersPerJavaThread = 2; + // Accessors double gc_pause_goal_sec() const { return _gc_pause_goal_sec; } @@ -331,6 +333,8 @@ // Return true if the policy suggested a change. bool tenuring_threshold_change() const; + static bool _debug_perturbation; + public: AdaptiveSizePolicy(size_t init_eden_size, size_t init_promo_size, @@ -338,6 +342,31 @@ double gc_pause_goal_sec, uint gc_cost_ratio); + // Return number default GC threads to use in the next GC. + static int calc_default_active_workers(uintx total_workers, + const uintx min_workers, + uintx active_workers, + uintx application_workers); + + // Return number of GC threads to use in the next GC. + // This is called sparingly so as not to change the + // number of GC workers gratuitously. + // For ParNew collections + // For PS scavenge and ParOld collections + // For G1 evacuation pauses (subject to update) + // Other collection phases inherit the number of + // GC workers from the calls above. For example, + // a CMS parallel remark uses the same number of GC + // workers as the most recent ParNew collection. + static int calc_active_workers(uintx total_workers, + uintx active_workers, + uintx application_workers); + + // Return number of GC threads to use in the next concurrent GC phase. + static int calc_active_conc_workers(uintx total_workers, + uintx active_workers, + uintx application_workers); + bool is_gc_cms_adaptive_size_policy() { return kind() == _gc_cms_adaptive_size_policy; } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp --- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -196,8 +196,6 @@ static void mark_object(oop obj); // Mark pointer and follow contents. Empty marking stack afterwards. template static inline void follow_root(T* p); - // Mark pointer and follow contents. - template static inline void mark_and_follow(T* p); // Check mark and maybe push on marking stack template static inline void mark_and_push(T* p); static inline void push_objarray(oop obj, size_t index); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp --- a/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -63,18 +63,6 @@ follow_stack(); } -template inline void MarkSweep::mark_and_follow(T* p) { -// assert(Universe::heap()->is_in_reserved(p), "should be in object space"); - T heap_oop = oopDesc::load_heap_oop(p); - if (!oopDesc::is_null(heap_oop)) { - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); - if (!obj->mark()->is_marked()) { - mark_object(obj); - obj->follow_contents(); - } - } -} - template inline void MarkSweep::mark_and_push(T* p) { // assert(Universe::heap()->is_in_reserved(p), "should be in object space"); T heap_oop = oopDesc::load_heap_oop(p); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/cardTableModRefBS.cpp --- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -460,9 +460,43 @@ OopsInGenClosure* cl, CardTableRS* ct) { if (!mr.is_empty()) { - int n_threads = SharedHeap::heap()->n_par_threads(); - if (n_threads > 0) { + // Caller (process_strong_roots()) claims that all GC threads + // execute this call. With UseDynamicNumberOfGCThreads now all + // active GC threads execute this call. The number of active GC + // threads needs to be passed to par_non_clean_card_iterate_work() + // to get proper partitioning and termination. + // + // This is an example of where n_par_threads() is used instead + // of workers()->active_workers(). n_par_threads can be set to 0 to + // turn off parallelism. For example when this code is called as + // part of verification and SharedHeap::process_strong_roots() is being + // used, then n_par_threads() may have been set to 0. active_workers + // is not overloaded with the meaning that it is a switch to disable + // parallelism and so keeps the meaning of the number of + // active gc workers. If parallelism has not been shut off by + // setting n_par_threads to 0, then n_par_threads should be + // equal to active_workers. When a different mechanism for shutting + // off parallelism is used, then active_workers can be used in + // place of n_par_threads. + // This is an example of a path where n_par_threads is + // set to 0 to turn off parallism. + // [7] CardTableModRefBS::non_clean_card_iterate() + // [8] CardTableRS::younger_refs_in_space_iterate() + // [9] Generation::younger_refs_in_space_iterate() + // [10] OneContigSpaceCardGeneration::younger_refs_iterate() + // [11] CompactingPermGenGen::younger_refs_iterate() + // [12] CardTableRS::younger_refs_iterate() + // [13] SharedHeap::process_strong_roots() + // [14] G1CollectedHeap::verify() + // [15] Universe::verify() + // [16] G1CollectedHeap::do_collection_pause_at_safepoint() + // + int n_threads = SharedHeap::heap()->n_par_threads(); + bool is_par = n_threads > 0; + if (is_par) { #ifndef SERIALGC + assert(SharedHeap::heap()->n_par_threads() == + SharedHeap::heap()->workers()->active_workers(), "Mismatch"); non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads); #else // SERIALGC fatal("Parallel gc not supported here."); @@ -489,6 +523,10 @@ // change their values in any manner. void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl) { + bool is_par = (SharedHeap::heap()->n_par_threads() > 0); + assert(!is_par || + (SharedHeap::heap()->n_par_threads() == + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (mri.word_size() > 0) { @@ -624,23 +662,6 @@ return MemRegion(mr.end(), mr.end()); } -// Set all the dirty cards in the given region to "precleaned" state. -void CardTableModRefBS::preclean_dirty_cards(MemRegion mr) { - for (int i = 0; i < _cur_covered_regions; i++) { - MemRegion mri = mr.intersection(_covered[i]); - if (!mri.is_empty()) { - jbyte *cur_entry, *limit; - for (cur_entry = byte_for(mri.start()), limit = byte_for(mri.last()); - cur_entry <= limit; - cur_entry++) { - if (*cur_entry == dirty_card) { - *cur_entry = precleaned_card; - } - } - } - } -} - uintx CardTableModRefBS::ct_max_alignment_constraint() { return card_size * os::vm_page_size(); } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/cardTableModRefBS.hpp --- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -435,9 +435,6 @@ MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset, int reset_val); - // Set all the dirty cards in the given region to precleaned state. - void preclean_dirty_cards(MemRegion mr); - // Provide read-only access to the card table array. const jbyte* byte_for_const(const void* p) const { return byte_for(p); diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/cardTableRS.cpp --- a/hotspot/src/share/vm/memory/cardTableRS.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/cardTableRS.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -164,7 +164,13 @@ ClearNoncleanCardWrapper::ClearNoncleanCardWrapper( DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) : _dirty_card_closure(dirty_card_closure), _ct(ct) { + // Cannot yet substitute active_workers for n_par_threads + // in the case where parallelism is being turned off by + // setting n_par_threads to 0. _is_par = (SharedHeap::heap()->n_par_threads() > 0); + assert(!_is_par || + (SharedHeap::heap()->n_par_threads() == + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); } void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/sharedHeap.cpp --- a/hotspot/src/share/vm/memory/sharedHeap.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/sharedHeap.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -58,7 +58,6 @@ _perm_gen(NULL), _rem_set(NULL), _strong_roots_parity(0), _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)), - _n_par_threads(0), _workers(NULL) { if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { @@ -80,6 +79,14 @@ } } +int SharedHeap::n_termination() { + return _process_strong_tasks->n_threads(); +} + +void SharedHeap::set_n_termination(int t) { + _process_strong_tasks->set_n_threads(t); +} + bool SharedHeap::heap_lock_held_for_gc() { Thread* t = Thread::current(); return Heap_lock->owned_by_self() @@ -144,6 +151,10 @@ StrongRootsScope srs(this, activate_scope); // General strong roots. assert(_strong_roots_parity != 0, "must have called prologue code"); + // _n_termination for _process_strong_tasks should be set up stream + // in a method not running in a GC worker. Otherwise the GC worker + // could be trying to change the termination condition while the task + // is executing in another GC worker. if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) { Universe::oops_do(roots); // Consider perm-gen discovered lists to be strong. diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/sharedHeap.hpp --- a/hotspot/src/share/vm/memory/sharedHeap.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/sharedHeap.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -49,6 +49,62 @@ class CollectorPolicy; class KlassHandle; +// Note on use of FlexibleWorkGang's for GC. +// There are three places where task completion is determined. +// In +// 1) ParallelTaskTerminator::offer_termination() where _n_threads +// must be set to the correct value so that count of workers that +// have offered termination will exactly match the number +// working on the task. Tasks such as those derived from GCTask +// use ParallelTaskTerminator's. Tasks that want load balancing +// by work stealing use this method to gauge completion. +// 2) SubTasksDone has a variable _n_threads that is used in +// all_tasks_completed() to determine completion. all_tasks_complete() +// counts the number of tasks that have been done and then reset +// the SubTasksDone so that it can be used again. When the number of +// tasks is set to the number of GC workers, then _n_threads must +// be set to the number of active GC workers. G1CollectedHeap, +// HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone. +// This seems too many. +// 3) SequentialSubTasksDone has an _n_threads that is used in +// a way similar to SubTasksDone and has the same dependency on the +// number of active GC workers. CompactibleFreeListSpace and Space +// have SequentialSubTasksDone's. +// Example of using SubTasksDone and SequentialSubTasksDone +// G1CollectedHeap::g1_process_strong_roots() calls +// process_strong_roots(false, // no scoping; this is parallel code +// collecting_perm_gen, so, +// &buf_scan_non_heap_roots, +// &eager_scan_code_roots, +// &buf_scan_perm); +// which delegates to SharedHeap::process_strong_roots() and uses +// SubTasksDone* _process_strong_tasks to claim tasks. +// process_strong_roots() calls +// rem_set()->younger_refs_iterate(perm_gen(), perm_blk); +// to scan the card table and which eventually calls down into +// CardTableModRefBS::par_non_clean_card_iterate_work(). This method +// uses SequentialSubTasksDone* _pst to claim tasks. +// Both SubTasksDone and SequentialSubTasksDone call their method +// all_tasks_completed() to count the number of GC workers that have +// finished their work. That logic is "when all the workers are +// finished the tasks are finished". +// +// The pattern that appears in the code is to set _n_threads +// to a value > 1 before a task that you would like executed in parallel +// and then to set it to 0 after that task has completed. A value of +// 0 is a "special" value in set_n_threads() which translates to +// setting _n_threads to 1. +// +// Some code uses _n_terminiation to decide if work should be done in +// parallel. The notorious possibly_parallel_oops_do() in threads.cpp +// is an example of such code. Look for variable "is_par" for other +// examples. +// +// The active_workers is not reset to 0 after a parallel phase. It's +// value may be used in later phases and in one instance at least +// (the parallel remark) it has to be used (the parallel remark depends +// on the partitioning done in the previous parallel scavenge). + class SharedHeap : public CollectedHeap { friend class VMStructs; @@ -84,11 +140,6 @@ // If we're doing parallel GC, use this gang of threads. FlexibleWorkGang* _workers; - // Number of parallel threads currently working on GC tasks. - // O indicates use sequential code; 1 means use parallel code even with - // only one thread, for performance testing purposes. - int _n_par_threads; - // Full initialization is done in a concrete subtype's "initialize" // function. SharedHeap(CollectorPolicy* policy_); @@ -107,6 +158,7 @@ CollectorPolicy *collector_policy() const { return _collector_policy; } void set_barrier_set(BarrierSet* bs); + SubTasksDone* process_strong_tasks() { return _process_strong_tasks; } // Does operations required after initialization has been done. virtual void post_initialize(); @@ -198,13 +250,6 @@ FlexibleWorkGang* workers() const { return _workers; } - // Sets the number of parallel threads that will be doing tasks - // (such as process strong roots) subsequently. - virtual void set_par_threads(int t); - - // Number of threads currently working on GC tasks. - int n_par_threads() { return _n_par_threads; } - // Invoke the "do_oop" method the closure "roots" on all root locations. // If "collecting_perm_gen" is false, then roots that may only contain // references to permGen objects are not scanned; instead, in that case, @@ -240,6 +285,13 @@ virtual void gc_prologue(bool full) = 0; virtual void gc_epilogue(bool full) = 0; + // Sets the number of parallel threads that will be doing tasks + // (such as process strong roots) subsequently. + virtual void set_par_threads(int t); + + int n_termination(); + void set_n_termination(int t); + // // New methods from CollectedHeap // diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/memory/space.hpp --- a/hotspot/src/share/vm/memory/space.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/memory/space.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -533,7 +533,8 @@ * by the MarkSweepAlwaysCompactCount parameter. \ */ \ int invocations = SharedHeap::heap()->perm_gen()->stat_record()->invocations;\ - bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0); \ + bool skip_dead = (MarkSweepAlwaysCompactCount < 1) \ + ||((invocations % MarkSweepAlwaysCompactCount) != 0); \ \ size_t allowed_deadspace = 0; \ if (skip_dead) { \ diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/oops/objArrayOop.hpp --- a/hotspot/src/share/vm/oops/objArrayOop.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/oops/objArrayOop.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -34,7 +34,7 @@ friend class objArrayKlass; friend class Runtime1; friend class psPromotionManager; - friend class CSMarkOopClosure; + friend class CSetMarkOopClosure; friend class G1ParScanPartialArrayClosure; template T* obj_at_addr(int index) const { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/runtime/arguments.cpp --- a/hotspot/src/share/vm/runtime/arguments.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/runtime/arguments.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -1394,8 +1394,8 @@ // If no heap maximum was requested explicitly, use some reasonable fraction // of the physical memory, up to a maximum of 1GB. if (UseParallelGC) { - FLAG_SET_ERGO(uintx, ParallelGCThreads, - Abstract_VM_Version::parallel_worker_threads()); + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the // SurvivorRatio has been set, reset their default values to SurvivorRatio + diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/runtime/globals.cpp diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/runtime/globals.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -1416,6 +1416,21 @@ product(uintx, ParallelGCThreads, 0, \ "Number of parallel threads parallel gc will use") \ \ + product(bool, UseDynamicNumberOfGCThreads, false, \ + "Dynamically choose the number of parallel threads " \ + "parallel gc will use") \ + \ + diagnostic(bool, ForceDynamicNumberOfGCThreads, false, \ + "Force dynamic selection of the number of" \ + "parallel threads parallel gc will use to aid debugging") \ + \ + product(uintx, HeapSizePerGCThread, ScaleForWordSize(64*M), \ + "Size of heap (bytes) per GC thread used in calculating the " \ + "number of GC threads") \ + \ + product(bool, TraceDynamicGCThreads, false, \ + "Trace the dynamic GC thread usage") \ + \ develop(bool, ParallelOldGCSplitALot, false, \ "Provoke splitting (copying data from a young gen space to" \ "multiple destination spaces)") \ @@ -2357,7 +2372,7 @@ develop(bool, TraceGCTaskQueue, false, \ "Trace actions of the GC task queues") \ \ - develop(bool, TraceGCTaskThread, false, \ + diagnostic(bool, TraceGCTaskThread, false, \ "Trace actions of the GC task threads") \ \ product(bool, PrintParallelOldGCPhaseTimes, false, \ diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/runtime/globals_extension.hpp diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/runtime/thread.cpp --- a/hotspot/src/share/vm/runtime/thread.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/runtime/thread.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -778,12 +778,12 @@ return true; } else { guarantee(res == strong_roots_parity, "Or else what?"); - assert(SharedHeap::heap()->n_par_threads() > 0, - "Should only fail when parallel."); + assert(SharedHeap::heap()->workers()->active_workers() > 0, + "Should only fail when parallel."); return false; } } - assert(SharedHeap::heap()->n_par_threads() > 0, + assert(SharedHeap::heap()->workers()->active_workers() > 0, "Should only fail when parallel."); return false; } @@ -3939,7 +3939,15 @@ // root groups. Overhead should be small enough to use all the time, // even in sequential code. SharedHeap* sh = SharedHeap::heap(); - bool is_par = (sh->n_par_threads() > 0); + // Cannot yet substitute active_workers for n_par_threads + // because of G1CollectedHeap::verify() use of + // SharedHeap::process_strong_roots(). n_par_threads == 0 will + // turn off parallelism in process_strong_roots while active_workers + // is being used for parallelism elsewhere. + bool is_par = sh->n_par_threads() > 0; + assert(!is_par || + (SharedHeap::heap()->n_par_threads() == + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); int cp = SharedHeap::heap()->strong_roots_parity(); ALL_JAVA_THREADS(p) { if (p->claim_oops_do(is_par, cp)) { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/services/memoryManager.cpp --- a/hotspot/src/share/vm/services/memoryManager.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/services/memoryManager.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -168,10 +168,8 @@ // initialize the arrays for memory usage _before_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools); _after_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools); - size_t len = num_pools * sizeof(MemoryUsage); - memset(_before_gc_usage_array, 0, len); - memset(_after_gc_usage_array, 0, len); _usage_array_size = num_pools; + clear(); } GCStatInfo::~GCStatInfo() { @@ -304,12 +302,8 @@ pool->set_last_collection_usage(usage); LowMemoryDetector::detect_after_gc_memory(pool); } - if(is_notification_enabled()) { - bool isMajorGC = this == MemoryService::get_major_gc_manager(); - GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC", - GCCause::to_string(cause)); - } } + if (countCollection) { _num_collections++; // alternately update two objects making one public when complete @@ -321,6 +315,12 @@ // reset the current stat for diagnosability purposes _current_gc_stat->clear(); } + + if (is_notification_enabled()) { + bool isMajorGC = this == MemoryService::get_major_gc_manager(); + GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC", + GCCause::to_string(cause)); + } } } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/utilities/workgroup.cpp --- a/hotspot/src/share/vm/utilities/workgroup.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/utilities/workgroup.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -57,7 +57,6 @@ bool are_GC_task_threads, bool are_ConcurrentGC_threads) : AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) { - // Save arguments. _total_workers = workers; } @@ -127,6 +126,12 @@ } void WorkGang::run_task(AbstractGangTask* task) { + run_task(task, total_workers()); +} + +void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) { + task->set_for_termination(no_of_parallel_workers); + // This thread is executed by the VM thread which does not block // on ordinary MutexLocker's. MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); @@ -143,22 +148,32 @@ // Tell the workers to get to work. monitor()->notify_all(); // Wait for them to be finished - while (finished_workers() < total_workers()) { + while (finished_workers() < (int) no_of_parallel_workers) { if (TraceWorkGang) { tty->print_cr("Waiting in work gang %s: %d/%d finished sequence %d", - name(), finished_workers(), total_workers(), + name(), finished_workers(), no_of_parallel_workers, _sequence_number); } monitor()->wait(/* no_safepoint_check */ true); } _task = NULL; if (TraceWorkGang) { - tty->print_cr("/nFinished work gang %s: %d/%d sequence %d", - name(), finished_workers(), total_workers(), + tty->print_cr("\nFinished work gang %s: %d/%d sequence %d", + name(), finished_workers(), no_of_parallel_workers, _sequence_number); + Thread* me = Thread::current(); + tty->print_cr(" T: 0x%x VM_thread: %d", me, me->is_VM_thread()); } } +void FlexibleWorkGang::run_task(AbstractGangTask* task) { + // If active_workers() is passed, _finished_workers + // must only be incremented for workers that find non_null + // work (as opposed to all those that just check that the + // task is not null). + WorkGang::run_task(task, (uint) active_workers()); +} + void AbstractWorkGang::stop() { // Tell all workers to terminate, then wait for them to become inactive. MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); @@ -168,10 +183,10 @@ _task = NULL; _terminate = true; monitor()->notify_all(); - while (finished_workers() < total_workers()) { + while (finished_workers() < active_workers()) { if (TraceWorkGang) { tty->print_cr("Waiting in work gang %s: %d/%d finished", - name(), finished_workers(), total_workers()); + name(), finished_workers(), active_workers()); } monitor()->wait(/* no_safepoint_check */ true); } @@ -275,10 +290,12 @@ // Check for new work. if ((data.task() != NULL) && (data.sequence_number() != previous_sequence_number)) { - gang()->internal_note_start(); - gang_monitor->notify_all(); - part = gang()->started_workers() - 1; - break; + if (gang()->needs_more_workers()) { + gang()->internal_note_start(); + gang_monitor->notify_all(); + part = gang()->started_workers() - 1; + break; + } } // Nothing to do. gang_monitor->wait(/* no_safepoint_check */ true); @@ -350,6 +367,9 @@ #endif /* PRODUCT */ +// FlexibleWorkGang + + // *** WorkGangBarrierSync WorkGangBarrierSync::WorkGangBarrierSync() @@ -411,10 +431,8 @@ } void SubTasksDone::set_n_threads(int t) { -#ifdef ASSERT assert(_claimed == 0 || _threads_completed == _n_threads, "should not be called while tasks are being processed!"); -#endif _n_threads = (t == 0 ? 1 : t); } diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/utilities/workgroup.hpp --- a/hotspot/src/share/vm/utilities/workgroup.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/utilities/workgroup.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -96,11 +96,14 @@ protected: // Constructor and desctructor: only construct subclasses. - AbstractGangTask(const char* name) { + AbstractGangTask(const char* name) + { NOT_PRODUCT(_name = name); _counter = 0; } virtual ~AbstractGangTask() { } + +public: }; class AbstractGangTaskWOopQueues : public AbstractGangTask { @@ -116,6 +119,7 @@ OopTaskQueueSet* queues() { return _queues; } }; + // Class AbstractWorkGang: // An abstract class representing a gang of workers. // You subclass this to supply an implementation of run_task(). @@ -130,6 +134,8 @@ virtual void run_task(AbstractGangTask* task) = 0; // Stop and terminate all workers. virtual void stop(); + // Return true if more workers should be applied to the task. + virtual bool needs_more_workers() const { return true; } public: // Debugging. const char* name() const; @@ -287,20 +293,62 @@ AbstractWorkGang* gang() const { return _gang; } }; +// Dynamic number of worker threads +// +// This type of work gang is used to run different numbers of +// worker threads at different times. The +// number of workers run for a task is "_active_workers" +// instead of "_total_workers" in a WorkGang. The method +// "needs_more_workers()" returns true until "_active_workers" +// have been started and returns false afterwards. The +// implementation of "needs_more_workers()" in WorkGang always +// returns true so that all workers are started. The method +// "loop()" in GangWorker was modified to ask "needs_more_workers()" +// in its loop to decide if it should start working on a task. +// A worker in "loop()" waits for notification on the WorkGang +// monitor and execution of each worker as it checks for work +// is serialized via the same monitor. The "needs_more_workers()" +// call is serialized and additionally the calculation for the +// "part" (effectively the worker id for executing the task) is +// serialized to give each worker a unique "part". Workers that +// are not needed for this tasks (i.e., "_active_workers" have +// been started before it, continue to wait for work. + class FlexibleWorkGang: public WorkGang { + // The currently active workers in this gang. + // This is a number that is dynamically adjusted + // and checked in the run_task() method at each invocation. + // As described above _active_workers determines the number + // of threads started on a task. It must also be used to + // determine completion. + protected: int _active_workers; public: // Constructor and destructor. + // Initialize active_workers to a minimum value. Setting it to + // the parameter "workers" will initialize it to a maximum + // value which is not desirable. FlexibleWorkGang(const char* name, int workers, bool are_GC_task_threads, bool are_ConcurrentGC_threads) : - WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) { - _active_workers = ParallelGCThreads; - }; + WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads), + _active_workers(UseDynamicNumberOfGCThreads ? 1 : ParallelGCThreads) {}; // Accessors for fields virtual int active_workers() const { return _active_workers; } - void set_active_workers(int v) { _active_workers = v; } + void set_active_workers(int v) { + assert(v <= _total_workers, + "Trying to set more workers active than there are"); + _active_workers = MIN2(v, _total_workers); + assert(v != 0, "Trying to set active workers to 0"); + _active_workers = MAX2(1, _active_workers); + assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers, + "Unless dynamic should use total workers"); + } + virtual void run_task(AbstractGangTask* task); + virtual bool needs_more_workers() const { + return _started_workers < _active_workers; + } }; // Work gangs in garbage collectors: 2009-06-10 @@ -357,6 +405,11 @@ class SubTasksDone: public CHeapObj { jint* _tasks; int _n_tasks; + // _n_threads is used to determine when a sub task is done. + // It does not control how many threads will execute the subtask + // but must be initialized to the number that do execute the task + // in order to correctly decide when the subtask is done (all the + // threads working on the task have finished). int _n_threads; jint _threads_completed; #ifdef ASSERT diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.cpp Fri Dec 02 15:11:18 2011 -0800 @@ -125,7 +125,7 @@ if (requested_size != 0) { _active_workers = MIN2(requested_size, total_workers()); } else { - _active_workers = total_workers(); + _active_workers = active_workers(); } new_task->set_actual_size(_active_workers); new_task->set_for_termination(_active_workers); @@ -148,22 +148,22 @@ for (Status status = yielding_task()->status(); status != COMPLETED && status != YIELDED && status != ABORTED; status = yielding_task()->status()) { - assert(started_workers() <= total_workers(), "invariant"); - assert(finished_workers() <= total_workers(), "invariant"); - assert(yielded_workers() <= total_workers(), "invariant"); + assert(started_workers() <= active_workers(), "invariant"); + assert(finished_workers() <= active_workers(), "invariant"); + assert(yielded_workers() <= active_workers(), "invariant"); monitor()->wait(Mutex::_no_safepoint_check_flag); } switch (yielding_task()->status()) { case COMPLETED: case ABORTED: { - assert(finished_workers() == total_workers(), "Inconsistent status"); + assert(finished_workers() == active_workers(), "Inconsistent status"); assert(yielded_workers() == 0, "Invariant"); reset(); // for next task; gang<->task binding released break; } case YIELDED: { assert(yielded_workers() > 0, "Invariant"); - assert(yielded_workers() + finished_workers() == total_workers(), + assert(yielded_workers() + finished_workers() == active_workers(), "Inconsistent counts"); break; } @@ -182,7 +182,6 @@ MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); assert(task() != NULL && task() == gang_task, "Incorrect usage"); - // assert(_active_workers == total_workers(), "For now"); assert(_started_workers == _active_workers, "Precondition"); assert(_yielded_workers > 0 && yielding_task()->status() == YIELDED, "Else why are we calling continue_task()"); @@ -202,7 +201,7 @@ void YieldingFlexibleWorkGang::yield() { assert(task() != NULL, "Inconsistency; should have task binding"); MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); - assert(yielded_workers() < total_workers(), "Consistency check"); + assert(yielded_workers() < active_workers(), "Consistency check"); if (yielding_task()->status() == ABORTING) { // Do not yield; we need to abort as soon as possible // XXX NOTE: This can cause a performance pathology in the @@ -213,7 +212,7 @@ // us to return at each potential yield point. return; } - if (++_yielded_workers + finished_workers() == total_workers()) { + if (++_yielded_workers + finished_workers() == active_workers()) { yielding_task()->set_status(YIELDED); monitor()->notify_all(); } else { diff -r f0eccb294698 -r 3ba6557b91f7 hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp --- a/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp Wed Jul 05 17:57:09 2017 +0200 +++ b/hotspot/src/share/vm/utilities/yieldingWorkgroup.hpp Fri Dec 02 15:11:18 2011 -0800 @@ -199,17 +199,11 @@ void abort(); private: - int _active_workers; int _yielded_workers; void wait_for_gang(); public: // Accessors for fields - int active_workers() const { - return _active_workers; - } - - // Accessors for fields int yielded_workers() const { return _yielded_workers; }