# HG changeset patch # User tschatzl # Date 1524823606 -7200 # Node ID 358be4680d12fe3404cffec05677f8b942890881 # Parent c822dd1a3b66238be059caa56954f29d8df3901f 6672778: G1 should trim task queues more aggressively during evacuation pauses Summary: Regularly try to drain task queues. This helps memory usage and performance during garbage collection. Reviewed-by: sjohanss, sangheki diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/bufferingOopClosure.hpp --- a/src/hotspot/share/gc/g1/bufferingOopClosure.hpp Fri Apr 27 10:59:46 2018 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_VM_GC_G1_BUFFERINGOOPCLOSURE_HPP -#define SHARE_VM_GC_G1_BUFFERINGOOPCLOSURE_HPP - -#include "memory/iterator.hpp" -#include "oops/oopsHierarchy.hpp" -#include "runtime/os.hpp" -#include "utilities/debug.hpp" - -// A BufferingOops closure tries to separate out the cost of finding roots -// from the cost of applying closures to them. It maintains an array of -// ref-containing locations. Until the array is full, applying the closure -// to an oop* merely records that location in the array. Since this -// closure app cost is small, an elapsed timer can approximately attribute -// all of this cost to the cost of finding the roots. When the array fills -// up, the wrapped closure is applied to all elements, keeping track of -// this elapsed time of this process, and leaving the array empty. -// The caller must be sure to call "done" to process any unprocessed -// buffered entries. - -class BufferingOopClosure: public OopClosure { - friend class BufferingOopClosureTest; -protected: - static const size_t BufferLength = 1024; - - // We need to know if the buffered addresses contain oops or narrowOops. - // We can't tag the addresses the way StarTask does, because we need to - // be able to handle unaligned addresses coming from oops embedded in code. - // - // The addresses for the full-sized oops are filled in from the bottom, - // while the addresses for the narrowOops are filled in from the top. - OopOrNarrowOopStar _buffer[BufferLength]; - OopOrNarrowOopStar* _oop_top; - OopOrNarrowOopStar* _narrowOop_bottom; - - OopClosure* _oc; - double _closure_app_seconds; - - - bool is_buffer_empty() { - return _oop_top == _buffer && _narrowOop_bottom == (_buffer + BufferLength - 1); - } - - bool is_buffer_full() { - return _narrowOop_bottom < _oop_top; - } - - // Process addresses containing full-sized oops. - void process_oops() { - for (OopOrNarrowOopStar* curr = _buffer; curr < _oop_top; ++curr) { - _oc->do_oop((oop*)(*curr)); - } - _oop_top = _buffer; - } - - // Process addresses containing narrow oops. - void process_narrowOops() { - for (OopOrNarrowOopStar* curr = _buffer + BufferLength - 1; curr > _narrowOop_bottom; --curr) { - _oc->do_oop((narrowOop*)(*curr)); - } - _narrowOop_bottom = _buffer + BufferLength - 1; - } - - // Apply the closure to all oops and clear the buffer. - // Accumulate the time it took. - void process_buffer() { - double start = os::elapsedTime(); - - process_oops(); - process_narrowOops(); - - _closure_app_seconds += (os::elapsedTime() - start); - } - - void process_buffer_if_full() { - if (is_buffer_full()) { - process_buffer(); - } - } - - void add_narrowOop(narrowOop* p) { - assert(!is_buffer_full(), "Buffer should not be full"); - *_narrowOop_bottom = (OopOrNarrowOopStar)p; - _narrowOop_bottom--; - } - - void add_oop(oop* p) { - assert(!is_buffer_full(), "Buffer should not be full"); - *_oop_top = (OopOrNarrowOopStar)p; - _oop_top++; - } - -public: - virtual void do_oop(narrowOop* p) { - process_buffer_if_full(); - add_narrowOop(p); - } - - virtual void do_oop(oop* p) { - process_buffer_if_full(); - add_oop(p); - } - - void done() { - if (!is_buffer_empty()) { - process_buffer(); - } - } - - double closure_app_seconds() { - return _closure_app_seconds; - } - - BufferingOopClosure(OopClosure *oc) : - _oc(oc), - _oop_top(_buffer), - _narrowOop_bottom(_buffer + BufferLength - 1), - _closure_app_seconds(0.0) { } -}; - -#endif // SHARE_VM_GC_G1_BUFFERINGOOPCLOSURE_HPP diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1Arguments.cpp --- a/src/hotspot/share/gc/g1/g1Arguments.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1Arguments.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -126,6 +126,11 @@ log_trace(gc)("MarkStackSize: %uk MarkStackSizeMax: %uk", (unsigned int) (MarkStackSize / K), (uint) (MarkStackSizeMax / K)); + // By default do not let the target stack size to be more than 1/4 of the entries + if (FLAG_IS_DEFAULT(GCDrainStackTargetSize)) { + FLAG_SET_ERGO(uintx, GCDrainStackTargetSize, MIN2(GCDrainStackTargetSize, (uintx)TASKQUEUE_SIZE / 4)); + } + #ifdef COMPILER2 // Enable loop strip mining to offer better pause time guarantees if (FLAG_IS_DEFAULT(UseCountedLoopSafepoints)) { diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1CollectedHeap.cpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -28,7 +28,6 @@ #include "classfile/symbolTable.hpp" #include "code/codeCache.hpp" #include "code/icBuffer.hpp" -#include "gc/g1/bufferingOopClosure.hpp" #include "gc/g1/g1Allocator.inline.hpp" #include "gc/g1/g1BarrierSet.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" @@ -1841,7 +1840,7 @@ while (dcqs.apply_closure_during_gc(cl, worker_i)) { n_completed_buffers++; } - g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers); + g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers, G1GCPhaseTimes::UpdateRSProcessedBuffers); dcqs.clear_n_completed_buffers(); assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); } @@ -3130,15 +3129,13 @@ double start_strong_roots_sec = os::elapsedTime(); - _root_processor->evacuate_roots(pss->closures(), worker_id); + _root_processor->evacuate_roots(pss, worker_id); // We pass a weak code blobs closure to the remembered set scanning because we want to avoid // treating the nmethods visited to act as roots for concurrent marking. // We only want to make sure that the oops in the nmethods are adjusted with regard to the // objects copied by the current evacuation. - _g1h->g1_rem_set()->oops_into_collection_set_do(pss, - pss->closures()->weak_codeblobs(), - worker_id); + _g1h->g1_rem_set()->oops_into_collection_set_do(pss, worker_id); double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec; @@ -3152,9 +3149,11 @@ evac_term_attempts = evac.term_attempts(); term_sec = evac.term_time(); double elapsed_sec = os::elapsedTime() - start; - _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); - _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); - _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, evac_term_attempts); + + G1GCPhaseTimes* p = _g1h->g1_policy()->phase_times(); + p->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); + p->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); + p->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, evac_term_attempts); } assert(pss->queue_is_empty(), "should be empty"); diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1GCPhaseTimes.hpp" #include "gc/g1/g1HotCardCache.hpp" +#include "gc/g1/g1ParScanThreadState.inline.hpp" #include "gc/g1/g1StringDedup.hpp" #include "gc/shared/workerDataArray.inline.hpp" #include "memory/resourceArea.hpp" @@ -167,9 +168,12 @@ } #define ASSERT_PHASE_UNINITIALIZED(phase) \ - assert(_gc_par_phases[phase]->get(i) == uninitialized, "Phase " #phase " reported for thread that was not started"); + assert(_gc_par_phases[phase] == NULL || _gc_par_phases[phase]->get(i) == uninitialized, "Phase " #phase " reported for thread that was not started"); double G1GCPhaseTimes::worker_time(GCParPhases phase, uint worker) { + if (_gc_par_phases[phase] == NULL) { + return 0.0; + } double value = _gc_par_phases[phase]->get(worker); if (value != WorkerDataArray::uninitialized()) { return value; @@ -189,21 +193,20 @@ double total_worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i); record_time_secs(GCWorkerTotal, i , total_worker_time); - double worker_known_time = - worker_time(ExtRootScan, i) - + worker_time(SATBFiltering, i) - + worker_time(UpdateRS, i) - + worker_time(ScanRS, i) - + worker_time(CodeRoots, i) - + worker_time(ObjCopy, i) - + worker_time(Termination, i); + double worker_known_time = worker_time(ExtRootScan, i) + + worker_time(ScanHCC, i) + + worker_time(UpdateRS, i) + + worker_time(ScanRS, i) + + worker_time(CodeRoots, i) + + worker_time(ObjCopy, i) + + worker_time(Termination, i); record_time_secs(Other, i, total_worker_time - worker_known_time); } else { // Make sure all slots are uninitialized since this thread did not seem to have been started ASSERT_PHASE_UNINITIALIZED(GCWorkerEnd); ASSERT_PHASE_UNINITIALIZED(ExtRootScan); - ASSERT_PHASE_UNINITIALIZED(SATBFiltering); + ASSERT_PHASE_UNINITIALIZED(ScanHCC); ASSERT_PHASE_UNINITIALIZED(UpdateRS); ASSERT_PHASE_UNINITIALIZED(ScanRS); ASSERT_PHASE_UNINITIALIZED(CodeRoots); @@ -225,6 +228,14 @@ _gc_par_phases[phase]->add(worker_i, secs); } +void G1GCPhaseTimes::record_or_add_objcopy_time_secs(uint worker_i, double secs) { + if (_gc_par_phases[ObjCopy]->get(worker_i) == _gc_par_phases[ObjCopy]->uninitialized()) { + record_time_secs(ObjCopy, worker_i, secs); + } else { + add_time_secs(ObjCopy, worker_i, secs); + } +} + void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index) { _gc_par_phases[phase]->set_thread_work_item(worker_i, count, index); } @@ -463,16 +474,49 @@ } } +G1EvacPhaseWithTrimTimeTracker::G1EvacPhaseWithTrimTimeTracker(G1ParScanThreadState* pss, Tickspan& total_time, Tickspan& trim_time) : + _pss(pss), + _start(Ticks::now()), + _total_time(total_time), + _trim_time(trim_time) { + + assert(_pss->trim_ticks().value() == 0, "Possibly remaining trim ticks left over from previous use"); +} + +G1EvacPhaseWithTrimTimeTracker::~G1EvacPhaseWithTrimTimeTracker() { + _total_time += (Ticks::now() - _start) - _pss->trim_ticks(); + _trim_time += _pss->trim_ticks(); + _pss->reset_trim_ticks(); +} + G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) : _phase_times(phase_times), _phase(phase), _worker_id(worker_id) { if (_phase_times != NULL) { - _start_time = os::elapsedTime(); + _start_time = Ticks::now(); } } G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() { if (_phase_times != NULL) { - _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time); + _phase_times->record_time_secs(_phase, _worker_id, TicksToTimeHelper::seconds(Ticks::now() - _start_time)); } } +G1EvacPhaseTimesTracker::G1EvacPhaseTimesTracker(G1GCPhaseTimes* phase_times, + G1ParScanThreadState* pss, + G1GCPhaseTimes::GCParPhases phase, + uint worker_id) : + G1GCParPhaseTimesTracker(phase_times, phase, worker_id), + _total_time(), + _trim_time(), + _trim_tracker(pss, _total_time, _trim_time) { +} + +G1EvacPhaseTimesTracker::~G1EvacPhaseTimesTracker() { + if (_phase_times != NULL) { + // Exclude trim time by increasing the start time. + _start_time += _trim_time; + _phase_times->record_or_add_objcopy_time_secs(_worker_id, TicksToTimeHelper::seconds(_trim_time)); + } +} + diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp --- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -31,6 +31,7 @@ #include "utilities/macros.hpp" class LineBuffer; +class G1ParScanThreadState; class STWGCTimer; template class WorkerDataArray; @@ -198,6 +199,8 @@ // add a number of seconds to a phase void add_time_secs(GCParPhases phase, uint worker_i, double secs); + void record_or_add_objcopy_time_secs(uint worker_i, double secs); + void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index = 0); // return the average time for a phase in milliseconds @@ -369,14 +372,36 @@ ReferenceProcessorPhaseTimes* ref_phase_times() { return &_ref_phase_times; } }; -class G1GCParPhaseTimesTracker : public StackObj { - double _start_time; +class G1EvacPhaseWithTrimTimeTracker : public StackObj { + G1ParScanThreadState* _pss; + Ticks _start; + + Tickspan& _total_time; + Tickspan& _trim_time; +public: + G1EvacPhaseWithTrimTimeTracker(G1ParScanThreadState* pss, Tickspan& total_time, Tickspan& trim_time); + ~G1EvacPhaseWithTrimTimeTracker(); +}; + +class G1GCParPhaseTimesTracker : public CHeapObj { +protected: + Ticks _start_time; G1GCPhaseTimes::GCParPhases _phase; G1GCPhaseTimes* _phase_times; uint _worker_id; public: G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id); - ~G1GCParPhaseTimesTracker(); + virtual ~G1GCParPhaseTimesTracker(); +}; + +class G1EvacPhaseTimesTracker : public G1GCParPhaseTimesTracker { + Tickspan _total_time; + Tickspan _trim_time; + + G1EvacPhaseWithTrimTimeTracker _trim_tracker; +public: + G1EvacPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1ParScanThreadState* pss, G1GCPhaseTimes::GCParPhases phase, uint worker_id); + virtual ~G1EvacPhaseTimesTracker(); }; #endif // SHARE_VM_GC_G1_G1GCPHASETIMES_HPP diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1OopClosures.cpp --- a/src/hotspot/share/gc/g1/g1OopClosures.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1OopClosures.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,6 +56,8 @@ cld->oops_do(_closure, _must_claim, /*clear_modified_oops*/true); _closure->set_scanned_cld(NULL); + + _closure->trim_queue_partially(); } _count++; } diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1OopClosures.hpp --- a/src/hotspot/share/gc/g1/g1OopClosures.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1OopClosures.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -57,6 +57,8 @@ // This closure needs special handling for InstanceRefKlass. virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; } void set_region(HeapRegion* from) { _from = from; } + + inline void trim_queue_partially(); }; // Used during the Update RS phase to refine remaining cards in the DCQ during garbage collection. @@ -126,6 +128,8 @@ public: void set_scanned_cld(ClassLoaderData* cld) { _scanned_cld = cld; } inline void do_cld_barrier(oop new_obj); + + inline void trim_queue_partially(); }; enum G1Barrier { diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1OopClosures.inline.hpp --- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -67,6 +67,10 @@ } } +inline void G1ScanClosureBase::trim_queue_partially() { + _par_scan_state->trim_queue_partially(); +} + template inline void G1ScanEvacuatedObjClosure::do_oop_nv(T* p) { T heap_oop = RawAccess<>::oop_load(p); @@ -225,6 +229,10 @@ _cm->mark_in_next_bitmap(_worker_id, to_obj, from_obj->size()); } +void G1ParCopyHelper::trim_queue_partially() { + _par_scan_state->trim_queue_partially(); +} + template template void G1ParCopyClosure::do_oop_work(T* p) { @@ -269,6 +277,7 @@ mark_object(obj); } } + trim_queue_partially(); } template void G1RebuildRemSetClosure::do_oop_nv(T* p) { diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1ParScanThreadState.cpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -43,11 +43,15 @@ _dcq(&g1h->dirty_card_queue_set()), _ct(g1h->card_table()), _closures(NULL), + _plab_allocator(NULL), + _age_table(false), + _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()), + _scanner(g1h, this), _hash_seed(17), _worker_id(worker_id), - _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()), - _age_table(false), - _scanner(g1h, this), + _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1), + _stack_trim_lower_threshold(GCDrainStackTargetSize), + _trim_ticks(), _old_gen_is_full(false) { // we allocate G1YoungSurvRateNumRegions plus one entries, since @@ -138,16 +142,8 @@ void G1ParScanThreadState::trim_queue() { StarTask ref; do { - // Drain the overflow stack first, so other threads can steal. - while (_refs->pop_overflow(ref)) { - if (!_refs->try_push_to_taskqueue(ref)) { - dispatch_reference(ref); - } - } - - while (_refs->pop_local(ref)) { - dispatch_reference(ref); - } + // Fully drain the queue. + trim_queue_to_threshold(0); } while (!_refs->is_empty()); } @@ -314,7 +310,7 @@ // length field of the from-space object. arrayOop(obj)->set_length(0); oop* old_p = set_partial_array_mask(old); - push_on_queue(old_p); + do_oop_partial_array(old_p); } else { HeapRegion* const to_region = _g1h->heap_region_containing(obj_ptr); _scanner.set_region(to_region); diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1ParScanThreadState.hpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -35,6 +35,7 @@ #include "gc/shared/ageTable.hpp" #include "memory/allocation.hpp" #include "oops/oop.hpp" +#include "utilities/ticks.hpp" class G1PLABAllocator; class G1EvacuationRootClosures; @@ -42,7 +43,6 @@ class outputStream; class G1ParScanThreadState : public CHeapObj { - private: G1CollectedHeap* _g1h; RefToScanQueue* _refs; DirtyCardQueue _dcq; @@ -60,6 +60,11 @@ int _hash_seed; uint _worker_id; + // Upper and lower threshold to start and end work queue draining. + uint const _stack_trim_upper_threshold; + uint const _stack_trim_lower_threshold; + + Tickspan _trim_ticks; // Map from young-age-index (0 == not young, 1 is youngest) to // surviving words. base is what we get back from the malloc call size_t* _surviving_young_words_base; @@ -83,7 +88,7 @@ return _dest[original.value()]; } - public: +public: G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, size_t young_cset_length); virtual ~G1ParScanThreadState(); @@ -129,7 +134,7 @@ void flush(size_t* surviving_young_words); - private: +private: #define G1_PARTIAL_ARRAY_MASK 0x2 inline bool has_partial_array_mask(oop* ref) const { @@ -185,11 +190,19 @@ void report_promotion_event(InCSetState const dest_state, oop const old, size_t word_sz, uint age, HeapWord * const obj_ptr) const; - public: + inline bool needs_partial_trimming() const; + inline bool is_partially_trimmed() const; + + inline void trim_queue_to_threshold(uint threshold); +public: oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark); void trim_queue(); + void trim_queue_partially(); + + Tickspan trim_ticks() const; + void reset_trim_ticks(); inline void steal_and_trim_queue(RefToScanQueueSet *task_queues); diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp --- a/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.inline.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -29,6 +29,7 @@ #include "gc/g1/g1RemSet.hpp" #include "oops/access.inline.hpp" #include "oops/oop.inline.hpp" +#include "utilities/ticks.inline.hpp" template void G1ParScanThreadState::do_oop_evac(T* p) { // Reference should not be NULL here as such are never pushed to the task queue. @@ -151,4 +152,46 @@ } } +inline bool G1ParScanThreadState::needs_partial_trimming() const { + return !_refs->overflow_empty() || _refs->size() > _stack_trim_upper_threshold; +} + +inline bool G1ParScanThreadState::is_partially_trimmed() const { + return _refs->overflow_empty() && _refs->size() <= _stack_trim_lower_threshold; +} + +inline void G1ParScanThreadState::trim_queue_to_threshold(uint threshold) { + StarTask ref; + // Drain the overflow stack first, so other threads can potentially steal. + while (_refs->pop_overflow(ref)) { + if (!_refs->try_push_to_taskqueue(ref)) { + dispatch_reference(ref); + } + } + + while (_refs->pop_local(ref, threshold)) { + dispatch_reference(ref); + } +} + +inline void G1ParScanThreadState::trim_queue_partially() { + if (!needs_partial_trimming()) { + return; + } + + const Ticks start = Ticks::now(); + do { + trim_queue_to_threshold(_stack_trim_lower_threshold); + } while (!is_partially_trimmed()); + _trim_ticks += Ticks::now() - start; +} + +inline Tickspan G1ParScanThreadState::trim_ticks() const { + return _trim_ticks; +} + +inline void G1ParScanThreadState::reset_trim_ticks() { + _trim_ticks = Tickspan(); +} + #endif // SHARE_VM_GC_G1_G1PARSCANTHREADSTATE_INLINE_HPP diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1Policy.cpp --- a/src/hotspot/share/gc/g1/g1Policy.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1Policy.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -628,7 +628,7 @@ if (update_stats) { double cost_per_card_ms = 0.0; if (_pending_cards > 0) { - cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS) - scan_hcc_time_ms) / (double) _pending_cards; + cost_per_card_ms = (average_time_ms(G1GCPhaseTimes::UpdateRS)) / (double) _pending_cards; _analytics->report_cost_per_card_ms(cost_per_card_ms); } _analytics->report_cost_scan_hcc(scan_hcc_time_ms); @@ -730,9 +730,9 @@ } else { update_rs_time_goal_ms -= scan_hcc_time_ms; } - _g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS) - scan_hcc_time_ms, - phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), - update_rs_time_goal_ms); + _g1h->concurrent_refine()->adjust(average_time_ms(G1GCPhaseTimes::UpdateRS), + phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), + update_rs_time_goal_ms); cset_chooser()->verify(); } diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RemSet.cpp --- a/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -33,6 +33,7 @@ #include "gc/g1/g1GCPhaseTimes.hpp" #include "gc/g1/g1HotCardCache.hpp" #include "gc/g1/g1OopClosures.inline.hpp" +#include "gc/g1/g1RootClosures.hpp" #include "gc/g1/g1RemSet.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionManager.inline.hpp" @@ -306,32 +307,21 @@ G1ScanRSForRegionClosure::G1ScanRSForRegionClosure(G1RemSetScanState* scan_state, G1ScanObjsDuringScanRSClosure* scan_obj_on_card, - CodeBlobClosure* code_root_cl, + G1ParScanThreadState* pss, uint worker_i) : + _g1h(G1CollectedHeap::heap()), + _ct(_g1h->card_table()), + _pss(pss), + _scan_objs_on_card_cl(scan_obj_on_card), _scan_state(scan_state), - _scan_objs_on_card_cl(scan_obj_on_card), - _code_root_cl(code_root_cl), - _strong_code_root_scan_time_sec(0.0), + _worker_i(worker_i), _cards_claimed(0), _cards_scanned(0), _cards_skipped(0), - _worker_i(worker_i) { - _g1h = G1CollectedHeap::heap(); - _bot = _g1h->bot(); - _ct = _g1h->card_table(); -} - -void G1ScanRSForRegionClosure::scan_card(MemRegion mr, uint region_idx_for_card) { - HeapRegion* const card_region = _g1h->region_at(region_idx_for_card); - _scan_objs_on_card_cl->set_region(card_region); - card_region->oops_on_card_seq_iterate_careful(mr, _scan_objs_on_card_cl); - _cards_scanned++; -} - -void G1ScanRSForRegionClosure::scan_strong_code_roots(HeapRegion* r) { - double scan_start = os::elapsedTime(); - r->strong_code_roots_do(_code_root_cl); - _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start); + _rem_set_root_scan_time(), + _rem_set_trim_partially_time(), + _strong_code_root_scan_time(), + _strong_code_trim_partially_time() { } void G1ScanRSForRegionClosure::claim_card(size_t card_index, const uint region_idx_for_card){ @@ -339,13 +329,17 @@ _scan_state->add_dirty_region(region_idx_for_card); } -bool G1ScanRSForRegionClosure::do_heap_region(HeapRegion* r) { - assert(r->in_collection_set(), "should only be called on elements of CS."); - uint region_idx = r->hrm_index(); +void G1ScanRSForRegionClosure::scan_card(MemRegion mr, uint region_idx_for_card) { + HeapRegion* const card_region = _g1h->region_at(region_idx_for_card); + _scan_objs_on_card_cl->set_region(card_region); + card_region->oops_on_card_seq_iterate_careful(mr, _scan_objs_on_card_cl); + _scan_objs_on_card_cl->trim_queue_partially(); + _cards_scanned++; +} - if (_scan_state->iter_is_complete(region_idx)) { - return false; - } +void G1ScanRSForRegionClosure::scan_rem_set_roots(HeapRegion* r) { + uint const region_idx = r->hrm_index(); + if (_scan_state->claim_iter(region_idx)) { // If we ever free the collection set concurrently, we should also // clear the card table concurrently therefore we won't need to @@ -397,33 +391,52 @@ scan_card(mr, region_idx_for_card); } +} + +void G1ScanRSForRegionClosure::scan_strong_code_roots(HeapRegion* r) { + r->strong_code_roots_do(_pss->closures()->weak_codeblobs()); +} + +bool G1ScanRSForRegionClosure::do_heap_region(HeapRegion* r) { + assert(r->in_collection_set(), + "Should only be called on elements of the collection set but region %u is not.", + r->hrm_index()); + uint const region_idx = r->hrm_index(); + + // Do an early out if we know we are complete. + if (_scan_state->iter_is_complete(region_idx)) { + return false; + } + + { + G1EvacPhaseWithTrimTimeTracker timer(_pss, _rem_set_root_scan_time, _rem_set_trim_partially_time); + scan_rem_set_roots(r); + } + if (_scan_state->set_iter_complete(region_idx)) { + G1EvacPhaseWithTrimTimeTracker timer(_pss, _strong_code_root_scan_time, _strong_code_trim_partially_time); // Scan the strong code root list attached to the current region scan_strong_code_roots(r); } return false; } -void G1RemSet::scan_rem_set(G1ParScanThreadState* pss, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i) { - double rs_time_start = os::elapsedTime(); - +void G1RemSet::scan_rem_set(G1ParScanThreadState* pss, uint worker_i) { G1ScanObjsDuringScanRSClosure scan_cl(_g1h, pss); - G1ScanRSForRegionClosure cl(_scan_state, &scan_cl, heap_region_codeblobs, worker_i); + G1ScanRSForRegionClosure cl(_scan_state, &scan_cl, pss, worker_i); _g1h->collection_set_iterate_from(&cl, worker_i); - double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - - cl.strong_code_root_scan_time_sec(); - G1GCPhaseTimes* p = _g1p->phase_times(); - p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); + p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, TicksToTimeHelper::seconds(cl.rem_set_root_scan_time())); + p->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, TicksToTimeHelper::seconds(cl.rem_set_trim_partially_time())); + p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScanRSScannedCards); p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ScanRSClaimedCards); p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_skipped(), G1GCPhaseTimes::ScanRSSkippedCards); - p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec()); + p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, TicksToTimeHelper::seconds(cl.strong_code_root_scan_time())); + p->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, TicksToTimeHelper::seconds(cl.strong_code_root_trim_partially_time())); } // Closure used for updating rem sets. Only called during an evacuation pause. @@ -448,6 +461,7 @@ bool card_scanned = _g1rs->refine_card_during_gc(card_ptr, _update_rs_cl); if (card_scanned) { + _update_rs_cl->trim_queue_partially(); _cards_scanned++; } else { _cards_skipped++; @@ -460,32 +474,37 @@ }; void G1RemSet::update_rem_set(G1ParScanThreadState* pss, uint worker_i) { - G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1h, pss, worker_i); - G1RefineCardClosure refine_card_cl(_g1h, &update_rs_cl); + G1GCPhaseTimes* p = _g1p->phase_times(); - G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i); + // Apply closure to log entries in the HCC. if (G1HotCardCache::default_use_cache()) { - // Apply the closure to the entries of the hot card cache. - G1GCParPhaseTimesTracker y(_g1p->phase_times(), G1GCPhaseTimes::ScanHCC, worker_i); + G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::ScanHCC, worker_i); + + G1ScanObjsDuringUpdateRSClosure scan_hcc_cl(_g1h, pss, worker_i); + G1RefineCardClosure refine_card_cl(_g1h, &scan_hcc_cl); _g1h->iterate_hcc_closure(&refine_card_cl, worker_i); } - // Apply the closure to all remaining log entries. - _g1h->iterate_dirty_card_closure(&refine_card_cl, worker_i); + + // Now apply the closure to all remaining log entries. + { + G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::UpdateRS, worker_i); - G1GCPhaseTimes* p = _g1p->phase_times(); - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_scanned(), G1GCPhaseTimes::UpdateRSScannedCards); - p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_skipped(), G1GCPhaseTimes::UpdateRSSkippedCards); + G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1h, pss, worker_i); + G1RefineCardClosure refine_card_cl(_g1h, &update_rs_cl); + _g1h->iterate_dirty_card_closure(&refine_card_cl, worker_i); + + p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_scanned(), G1GCPhaseTimes::UpdateRSScannedCards); + p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_skipped(), G1GCPhaseTimes::UpdateRSSkippedCards); + } } void G1RemSet::cleanupHRRS() { HeapRegionRemSet::cleanup(); } -void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i) { +void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss, uint worker_i) { update_rem_set(pss, worker_i); - scan_rem_set(pss, heap_region_codeblobs, worker_i);; + scan_rem_set(pss, worker_i);; } void G1RemSet::prepare_for_oops_into_collection_set_do() { diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RemSet.hpp --- a/src/hotspot/share/gc/g1/g1RemSet.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RemSet.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -32,6 +32,7 @@ #include "gc/g1/heapRegion.hpp" #include "memory/allocation.hpp" #include "memory/iterator.hpp" +#include "utilities/ticks.hpp" // A G1RemSet provides ways of iterating over pointers into a selected // collection set. @@ -61,9 +62,7 @@ // Scan all remembered sets of the collection set for references into the collection // set. - void scan_rem_set(G1ParScanThreadState* pss, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i); + void scan_rem_set(G1ParScanThreadState* pss, uint worker_i); // Flush remaining refinement buffers for cross-region references to either evacuate references // into the collection set or update the remembered set. @@ -102,9 +101,7 @@ // // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code // roots list for each region in the collection set. - void oops_into_collection_set_do(G1ParScanThreadState* pss, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i); + void oops_into_collection_set_do(G1ParScanThreadState* pss, uint worker_i); // Prepare for and cleanup after an oops_into_collection_set_do // call. Must call each of these once before and after (in sequential @@ -138,37 +135,44 @@ }; class G1ScanRSForRegionClosure : public HeapRegionClosure { + G1CollectedHeap* _g1h; + G1CardTable *_ct; + + G1ParScanThreadState* _pss; + G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl; + G1RemSetScanState* _scan_state; + uint _worker_i; + size_t _cards_scanned; size_t _cards_claimed; size_t _cards_skipped; - G1CollectedHeap* _g1h; - - G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl; - CodeBlobClosure* _code_root_cl; + Tickspan _rem_set_root_scan_time; + Tickspan _rem_set_trim_partially_time; - G1BlockOffsetTable* _bot; - G1CardTable *_ct; - - double _strong_code_root_scan_time_sec; - uint _worker_i; + Tickspan _strong_code_root_scan_time; + Tickspan _strong_code_trim_partially_time; void claim_card(size_t card_index, const uint region_idx_for_card); void scan_card(MemRegion mr, uint region_idx_for_card); + + void scan_rem_set_roots(HeapRegion* r); void scan_strong_code_roots(HeapRegion* r); public: G1ScanRSForRegionClosure(G1RemSetScanState* scan_state, G1ScanObjsDuringScanRSClosure* scan_obj_on_card, - CodeBlobClosure* code_root_cl, + G1ParScanThreadState* pss, uint worker_i); bool do_heap_region(HeapRegion* r); - double strong_code_root_scan_time_sec() { - return _strong_code_root_scan_time_sec; - } + Tickspan rem_set_root_scan_time() const { return _rem_set_root_scan_time; } + Tickspan rem_set_trim_partially_time() const { return _rem_set_trim_partially_time; } + + Tickspan strong_code_root_scan_time() const { return _strong_code_root_scan_time; } + Tickspan strong_code_root_trim_partially_time() const { return _strong_code_trim_partially_time; } size_t cards_scanned() const { return _cards_scanned; } size_t cards_claimed() const { return _cards_claimed; } diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RootClosures.cpp --- a/src/hotspot/share/gc/g1/g1RootClosures.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RootClosures.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -37,8 +37,8 @@ bool in_young_gc) : _closures(g1h, pss, in_young_gc, /* must_claim_cld */ false) {} - OopClosure* weak_oops() { return &_closures._buffered_oops; } - OopClosure* strong_oops() { return &_closures._buffered_oops; } + OopClosure* weak_oops() { return &_closures._oops; } + OopClosure* strong_oops() { return &_closures._oops; } CLDClosure* weak_clds() { return &_closures._clds; } CLDClosure* strong_clds() { return &_closures._clds; } @@ -47,9 +47,6 @@ CodeBlobClosure* strong_codeblobs() { return &_closures._codeblobs; } CodeBlobClosure* weak_codeblobs() { return &_closures._codeblobs; } - void flush() { _closures._buffered_oops.done(); } - double closure_app_seconds() { return _closures._buffered_oops.closure_app_seconds(); } - OopClosure* raw_strong_oops() { return &_closures._oops; } bool trace_metadata() { return false; } @@ -79,8 +76,8 @@ _strong(g1h, pss, /* process_only_dirty_klasses */ false, /* must_claim_cld */ true), _weak(g1h, pss, /* process_only_dirty_klasses */ false, /* must_claim_cld */ true) {} - OopClosure* weak_oops() { return &_weak._buffered_oops; } - OopClosure* strong_oops() { return &_strong._buffered_oops; } + OopClosure* weak_oops() { return &_weak._oops; } + OopClosure* strong_oops() { return &_strong._oops; } // If MarkWeak is G1MarkPromotedFromRoot then the weak CLDs must be processed in a second pass. CLDClosure* weak_clds() { return null_if(&_weak._clds); } @@ -93,16 +90,6 @@ CodeBlobClosure* strong_codeblobs() { return &_strong._codeblobs; } CodeBlobClosure* weak_codeblobs() { return &_weak._codeblobs; } - void flush() { - _strong._buffered_oops.done(); - _weak._buffered_oops.done(); - } - - double closure_app_seconds() { - return _strong._buffered_oops.closure_app_seconds() + - _weak._buffered_oops.closure_app_seconds(); - } - OopClosure* raw_strong_oops() { return &_strong._oops; } // If we are not marking all weak roots then we are tracing diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RootClosures.hpp --- a/src/hotspot/share/gc/g1/g1RootClosures.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RootClosures.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,10 +49,6 @@ class G1EvacuationRootClosures : public G1RootClosures { public: - // Flush any buffered state and deferred processing - virtual void flush() = 0; - virtual double closure_app_seconds() = 0; - // Applied to the weakly reachable CLDs when all strongly reachable // CLDs are guaranteed to have been processed. virtual CLDClosure* second_pass_weak_clds() = 0; diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RootProcessor.cpp --- a/src/hotspot/share/gc/g1/g1RootProcessor.cpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RootProcessor.cpp Fri Apr 27 12:06:46 2018 +0200 @@ -28,12 +28,12 @@ #include "classfile/stringTable.hpp" #include "classfile/systemDictionary.hpp" #include "code/codeCache.hpp" -#include "gc/g1/bufferingOopClosure.hpp" #include "gc/g1/g1BarrierSet.hpp" #include "gc/g1/g1CodeBlobClosure.hpp" #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1CollectorState.hpp" #include "gc/g1/g1GCPhaseTimes.hpp" +#include "gc/g1/g1ParScanThreadState.inline.hpp" #include "gc/g1/g1Policy.hpp" #include "gc/g1/g1RootClosures.hpp" #include "gc/g1/g1RootProcessor.hpp" @@ -73,10 +73,12 @@ _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never), _n_workers_discovered_strong_classes(0) {} -void G1RootProcessor::evacuate_roots(G1EvacuationRootClosures* closures, uint worker_i) { - double ext_roots_start = os::elapsedTime(); +void G1RootProcessor::evacuate_roots(G1ParScanThreadState* pss, uint worker_i) { G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times(); + G1EvacPhaseTimesTracker timer(phase_times, pss, G1GCPhaseTimes::ExtRootScan, worker_i); + + G1EvacuationRootClosures* closures = pss->closures(); process_java_roots(closures, phase_times, worker_i); // This is the point where this worker thread will not find more strong CLDs/nmethods. @@ -118,17 +120,6 @@ assert(closures->second_pass_weak_clds() == NULL, "Should be null if not tracing metadata."); } - // Finish up any enqueued closure apps (attributed as object copy time). - closures->flush(); - - double obj_copy_time_sec = closures->closure_app_seconds(); - - phase_times->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec); - - double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec; - - phase_times->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec); - // During conc marking we have to filter the per-thread SATB buffers // to make sure we remove any oops into the CSet (which will show up // as implicitly live). diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1RootProcessor.hpp --- a/src/hotspot/share/gc/g1/g1RootProcessor.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1RootProcessor.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,7 @@ class G1CollectedHeap; class G1EvacuationRootClosures; class G1GCPhaseTimes; +class G1ParScanThreadState; class G1RootClosures; class Monitor; class OopClosure; @@ -97,10 +98,10 @@ public: G1RootProcessor(G1CollectedHeap* g1h, uint n_workers); - // Apply closures to the strongly and weakly reachable roots in the system + // Apply correct closures from pss to the strongly and weakly reachable roots in the system // in a single pass. // Record and report timing measurements for sub phases using the worker_i - void evacuate_roots(G1EvacuationRootClosures* closures, uint worker_i); + void evacuate_roots(G1ParScanThreadState* pss, uint worker_id); // Apply oops, clds and blobs to all strongly reachable roots in the system void process_strong_roots(OopClosure* oops, diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/g1/g1SharedClosures.hpp --- a/src/hotspot/share/gc/g1/g1SharedClosures.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/g1/g1SharedClosures.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -22,7 +22,6 @@ * */ -#include "gc/g1/bufferingOopClosure.hpp" #include "gc/g1/g1CodeBlobClosure.hpp" #include "gc/g1/g1OopClosures.hpp" #include "memory/iterator.hpp" @@ -39,12 +38,10 @@ G1CLDScanClosure _clds; G1CodeBlobClosure _codeblobs; - BufferingOopClosure _buffered_oops; G1SharedClosures(G1CollectedHeap* g1h, G1ParScanThreadState* pss, bool process_only_dirty, bool must_claim_cld) : _oops(g1h, pss), _oops_in_cld(g1h, pss), _clds(&_oops_in_cld, process_only_dirty, must_claim_cld), - _codeblobs(&_oops), - _buffered_oops(&_oops) {} + _codeblobs(&_oops) {} }; diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/shared/taskqueue.hpp --- a/src/hotspot/share/gc/shared/taskqueue.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/shared/taskqueue.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -285,9 +285,10 @@ inline bool push(E t); // Attempts to claim a task from the "local" end of the queue (the most - // recently pushed). If successful, returns true and sets t to the task; - // otherwise, returns false (the queue is empty). - inline bool pop_local(volatile E& t); + // recently pushed) as long as the number of entries exceeds the threshold. + // If successful, returns true and sets t to the task; otherwise, returns false + // (the queue is empty or the number of elements below the threshold). + inline bool pop_local(volatile E& t, uint threshold = 0); // Like pop_local(), but uses the "global" end of the queue (the least // recently pushed). diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/gc/shared/taskqueue.inline.hpp --- a/src/hotspot/share/gc/shared/taskqueue.inline.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/gc/shared/taskqueue.inline.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -152,7 +152,7 @@ } template inline bool -GenericTaskQueue::pop_local(volatile E& t) { +GenericTaskQueue::pop_local(volatile E& t, uint threshold) { uint localBot = _bottom; // This value cannot be N-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method @@ -160,7 +160,7 @@ // since this is pop_local.) uint dirty_n_elems = dirty_size(localBot, _age.top()); assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); - if (dirty_n_elems == 0) return false; + if (dirty_n_elems <= threshold) return false; localBot = decrement_index(localBot); _bottom = localBot; // This is necessary to prevent any read below from being reordered diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/utilities/ticks.hpp --- a/src/hotspot/share/utilities/ticks.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/utilities/ticks.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -47,6 +47,11 @@ return *this; } + Tickspan& operator-=(const Tickspan& rhs) { + _span_ticks -= rhs._span_ticks; + return *this; + } + jlong value() const { return _span_ticks; } diff -r c822dd1a3b66 -r 358be4680d12 src/hotspot/share/utilities/ticks.inline.hpp --- a/src/hotspot/share/utilities/ticks.inline.hpp Fri Apr 27 10:59:46 2018 +0200 +++ b/src/hotspot/share/utilities/ticks.inline.hpp Fri Apr 27 12:06:46 2018 +0200 @@ -32,6 +32,11 @@ return lhs; } +inline Tickspan operator-(Tickspan lhs, const Tickspan& rhs) { + lhs -= rhs; + return lhs; +} + inline bool operator==(const Tickspan& lhs, const Tickspan& rhs) { return lhs.value() == rhs.value(); } diff -r c822dd1a3b66 -r 358be4680d12 test/hotspot/gtest/gc/g1/test_bufferingOopClosure.cpp --- a/test/hotspot/gtest/gc/g1/test_bufferingOopClosure.cpp Fri Apr 27 10:59:46 2018 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,257 +0,0 @@ -/* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "precompiled.hpp" -#include "gc/g1/bufferingOopClosure.hpp" -#include "memory/iterator.hpp" -#include "unittest.hpp" - -class BufferingOopClosureTest : public ::testing::Test { - public: - // Helper class to fake a set of oop*s and narrowOop*s. - class FakeRoots { - public: - // Used for sanity checking of the values passed to the do_oops functions in the test. - static const uintptr_t NarrowOopMarker = uintptr_t(1) << (BitsPerWord -1); - - int _num_narrow; - int _num_full; - void** _narrow; - void** _full; - - FakeRoots(int num_narrow, int num_full) : - _num_narrow(num_narrow), - _num_full(num_full), - _narrow((void**)::malloc(sizeof(void*) * num_narrow)), - _full((void**)::malloc(sizeof(void*) * num_full)) { - - for (int i = 0; i < num_narrow; i++) { - _narrow[i] = (void*)(NarrowOopMarker + (uintptr_t)i); - } - for (int i = 0; i < num_full; i++) { - _full[i] = (void*)(uintptr_t)i; - } - } - - ~FakeRoots() { - ::free(_narrow); - ::free(_full); - } - - void oops_do_narrow_then_full(OopClosure* cl) { - for (int i = 0; i < _num_narrow; i++) { - cl->do_oop((narrowOop*)_narrow[i]); - } - for (int i = 0; i < _num_full; i++) { - cl->do_oop((oop*)_full[i]); - } - } - - void oops_do_full_then_narrow(OopClosure* cl) { - for (int i = 0; i < _num_full; i++) { - cl->do_oop((oop*)_full[i]); - } - for (int i = 0; i < _num_narrow; i++) { - cl->do_oop((narrowOop*)_narrow[i]); - } - } - - void oops_do_mixed(OopClosure* cl) { - int i; - for (i = 0; i < _num_full && i < _num_narrow; i++) { - cl->do_oop((oop*)_full[i]); - cl->do_oop((narrowOop*)_narrow[i]); - } - for (int j = i; j < _num_full; j++) { - cl->do_oop((oop*)_full[i]); - } - for (int j = i; j < _num_narrow; j++) { - cl->do_oop((narrowOop*)_narrow[i]); - } - } - - static const int MaxOrder = 2; - - void oops_do(OopClosure* cl, int do_oop_order) { - switch(do_oop_order) { - case 0: - oops_do_narrow_then_full(cl); - break; - case 1: - oops_do_full_then_narrow(cl); - break; - case 2: - oops_do_mixed(cl); - break; - default: - oops_do_narrow_then_full(cl); - break; - } - } - }; - - class CountOopClosure : public OopClosure { - int _narrow_oop_count; - int _full_oop_count; - public: - CountOopClosure() : _narrow_oop_count(0), _full_oop_count(0) {} - void do_oop(narrowOop* p) { - EXPECT_NE(uintptr_t(0), (uintptr_t(p) & FakeRoots::NarrowOopMarker)) - << "The narrowOop was unexpectedly not marked with the NarrowOopMarker"; - _narrow_oop_count++; - } - - void do_oop(oop* p){ - EXPECT_EQ(uintptr_t(0), (uintptr_t(p) & FakeRoots::NarrowOopMarker)) - << "The oop was unexpectedly marked with the NarrowOopMarker"; - _full_oop_count++; - } - - int narrow_oop_count() { return _narrow_oop_count; } - int full_oop_count() { return _full_oop_count; } - int all_oop_count() { return _narrow_oop_count + _full_oop_count; } - }; - - class DoNothingOopClosure : public OopClosure { - public: - void do_oop(narrowOop* p) {} - void do_oop(oop* p) {} - }; - - static void testCount(int num_narrow, int num_full, int do_oop_order) { - FakeRoots fr(num_narrow, num_full); - - CountOopClosure coc; - BufferingOopClosure boc(&coc); - - fr.oops_do(&boc, do_oop_order); - - boc.done(); - - EXPECT_EQ(num_narrow, coc.narrow_oop_count()) << "when running testCount(" - << num_narrow << ", " << num_full << ", " << do_oop_order << ")"; - - EXPECT_EQ(num_full, coc.full_oop_count()) << "when running testCount(" - << num_narrow << ", " << num_full << ", " << do_oop_order << ")"; - - EXPECT_EQ(num_narrow + num_full, coc.all_oop_count()) << "when running testCount(" - << num_narrow << ", " << num_full << ", " << do_oop_order << ")"; - } - - static void testIsBufferEmptyOrFull(int num_narrow, int num_full, bool expect_empty, bool expect_full) { - FakeRoots fr(num_narrow, num_full); - - DoNothingOopClosure cl; - BufferingOopClosure boc(&cl); - - fr.oops_do(&boc, 0); - - EXPECT_EQ(expect_empty, boc.is_buffer_empty()) - << "when running testIsBufferEmptyOrFull(" - << num_narrow << ", " << num_full << ", " - << expect_empty << ", " << expect_full << ")"; - - EXPECT_EQ(expect_full, boc.is_buffer_full()) - << "when running testIsBufferEmptyOrFull(" - << num_narrow << ", " << num_full << ", " - << expect_empty << ", " << expect_full << ")"; - } - - static void testEmptyAfterDone(int num_narrow, int num_full) { - FakeRoots fr(num_narrow, num_full); - - DoNothingOopClosure cl; - BufferingOopClosure boc(&cl); - - fr.oops_do(&boc, 0); - - // Make sure all get processed. - boc.done(); - - EXPECT_TRUE(boc.is_buffer_empty()) << "Should be empty after call to done()." - << " testEmptyAfterDone(" << num_narrow << ", " << num_full << ")"; - } - - static int get_buffer_length() { - return BufferingOopClosure::BufferLength; - } -}; - -TEST_VM_F(BufferingOopClosureTest, count_test) { - int bl = BufferingOopClosureTest::get_buffer_length(); - - for (int order = 0; order < FakeRoots::MaxOrder; order++) { - testCount(0, 0, order); - testCount(10, 0, order); - testCount(0, 10, order); - testCount(10, 10, order); - testCount(bl, 10, order); - testCount(10, bl, order); - testCount(bl, bl, order); - testCount(bl + 1, 10, order); - testCount(10, bl + 1, order); - testCount(bl + 1, bl, order); - testCount(bl, bl + 1, order); - testCount(bl + 1, bl + 1, order); - } -} - -TEST_VM_F(BufferingOopClosureTest, buffer_empty_or_full) { - int bl = BufferingOopClosureTest::get_buffer_length(); - - testIsBufferEmptyOrFull(0, 0, true, false); - testIsBufferEmptyOrFull(1, 0, false, false); - testIsBufferEmptyOrFull(0, 1, false, false); - testIsBufferEmptyOrFull(1, 1, false, false); - testIsBufferEmptyOrFull(10, 0, false, false); - testIsBufferEmptyOrFull(0, 10, false, false); - testIsBufferEmptyOrFull(10, 10, false, false); - testIsBufferEmptyOrFull(0, bl, false, true); - testIsBufferEmptyOrFull(bl, 0, false, true); - testIsBufferEmptyOrFull(bl / 2, bl / 2, false, true); - testIsBufferEmptyOrFull(bl - 1, 1, false, true); - testIsBufferEmptyOrFull(1, bl - 1, false, true); - // Processed - testIsBufferEmptyOrFull(bl + 1, 0, false, false); - testIsBufferEmptyOrFull(bl * 2, 0, false, true); -} - -TEST_VM_F(BufferingOopClosureTest, empty_after_done) { - int bl = BufferingOopClosureTest::get_buffer_length(); - - testEmptyAfterDone(0, 0); - testEmptyAfterDone(1, 0); - testEmptyAfterDone(0, 1); - testEmptyAfterDone(1, 1); - testEmptyAfterDone(10, 0); - testEmptyAfterDone(0, 10); - testEmptyAfterDone(10, 10); - testEmptyAfterDone(0, bl); - testEmptyAfterDone(bl, 0); - testEmptyAfterDone(bl / 2, bl / 2); - testEmptyAfterDone(bl - 1, 1); - testEmptyAfterDone(1, bl - 1); - // Processed - testEmptyAfterDone(bl + 1, 0); - testEmptyAfterDone(bl * 2, 0); -}