# HG changeset patch # User zgu # Date 1544208906 18000 # Node ID bec57b4a6d69651d68087efc2b9b66c6f7827c98 # Parent d2f118d3f8e7ca6cfb92808fa7fabe67b3e4b03f 8204947: Port ShenandoahTaskTerminator to mainline and make it default Reviewed-by: tschatzl, rkennke diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp --- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -55,6 +55,7 @@ #include "gc/shared/genOopClosures.inline.hpp" #include "gc/shared/isGCActiveMark.hpp" #include "gc/shared/oopStorageParState.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/referenceProcessorPhaseTimes.hpp" #include "gc/shared/space.inline.hpp" @@ -2982,7 +2983,7 @@ // Forward decl class CMSConcMarkingTask; -class CMSConcMarkingTerminator: public ParallelTaskTerminator { +class CMSConcMarkingParallelTerminator: public ParallelTaskTerminator { CMSCollector* _collector; CMSConcMarkingTask* _task; public: @@ -2992,7 +2993,7 @@ // "queue_set" is a set of work queues of other threads. // "collector" is the CMS collector associated with this task terminator. // "yield" indicates whether we need the gang as a whole to yield. - CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) : + CMSConcMarkingParallelTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) : ParallelTaskTerminator(n_threads, queue_set), _collector(collector) { } @@ -3001,6 +3002,45 @@ } }; +class CMSConcMarkingOWSTTerminator: public OWSTTaskTerminator { + CMSCollector* _collector; + CMSConcMarkingTask* _task; + public: + virtual void yield(); + + // "n_threads" is the number of threads to be terminated. + // "queue_set" is a set of work queues of other threads. + // "collector" is the CMS collector associated with this task terminator. + // "yield" indicates whether we need the gang as a whole to yield. + CMSConcMarkingOWSTTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) : + OWSTTaskTerminator(n_threads, queue_set), + _collector(collector) { } + + void set_task(CMSConcMarkingTask* task) { + _task = task; + } +}; + +class CMSConcMarkingTaskTerminator { + private: + ParallelTaskTerminator* _term; + public: + CMSConcMarkingTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) { + if (UseOWSTTaskTerminator) { + _term = new CMSConcMarkingOWSTTerminator(n_threads, queue_set, collector); + } else { + _term = new CMSConcMarkingParallelTerminator(n_threads, queue_set, collector); + } + } + ~CMSConcMarkingTaskTerminator() { + assert(_term != NULL, "Must not be NULL"); + delete _term; + } + + void set_task(CMSConcMarkingTask* task); + ParallelTaskTerminator* terminator() const { return _term; } +}; + class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator { CMSConcMarkingTask* _task; public: @@ -3028,7 +3068,7 @@ OopTaskQueueSet* _task_queues; // Termination (and yielding) support - CMSConcMarkingTerminator _term; + CMSConcMarkingTaskTerminator _term; CMSConcMarkingTerminatorTerminator _term_term; public: @@ -3058,7 +3098,7 @@ HeapWord* volatile* global_finger_addr() { return &_global_finger; } - CMSConcMarkingTerminator* terminator() { return &_term; } + ParallelTaskTerminator* terminator() { return _term.terminator(); } virtual void set_for_termination(uint active_workers) { terminator()->reset_for_reuse(active_workers); @@ -3076,7 +3116,7 @@ void reset(HeapWord* ra) { assert(_global_finger >= _cms_space->end(), "Postcondition of ::work(i)"); _restart_addr = _global_finger = ra; - _term.reset_for_reuse(); + _term.terminator()->reset_for_reuse(); } static bool get_work_from_overflow_stack(CMSMarkStack* ovflw_stk, @@ -3097,7 +3137,7 @@ // thread has yielded. } -void CMSConcMarkingTerminator::yield() { +void CMSConcMarkingParallelTerminator::yield() { if (_task->should_yield()) { _task->yield(); } else { @@ -3105,6 +3145,22 @@ } } +void CMSConcMarkingOWSTTerminator::yield() { + if (_task->should_yield()) { + _task->yield(); + } else { + OWSTTaskTerminator::yield(); + } +} + +void CMSConcMarkingTaskTerminator::set_task(CMSConcMarkingTask* task) { + if (UseOWSTTaskTerminator) { + ((CMSConcMarkingOWSTTerminator*)_term)->set_task(task); + } else { + ((CMSConcMarkingParallelTerminator*)_term)->set_task(task); + } +} + //////////////////////////////////////////////////////////////// // Concurrent Marking Algorithm Sketch //////////////////////////////////////////////////////////////// @@ -4293,7 +4349,7 @@ // The per-thread work queues, available here for stealing. OopTaskQueueSet* _task_queues; - ParallelTaskTerminator _term; + TaskTerminator _term; StrongRootsScope* _strong_roots_scope; public: @@ -4315,7 +4371,7 @@ OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } - ParallelTaskTerminator* terminator() { return &_term; } + ParallelTaskTerminator* terminator() { return _term.terminator(); } uint n_workers() { return _n_workers; } void work(uint worker_id); @@ -5003,11 +5059,11 @@ //////////////////////////////////////////////////////// class AbstractGangTaskWOopQueues : public AbstractGangTask { OopTaskQueueSet* _queues; - ParallelTaskTerminator _terminator; + TaskTerminator _terminator; public: AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) : AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {} - ParallelTaskTerminator* terminator() { return &_terminator; } + ParallelTaskTerminator* terminator() { return _terminator.terminator(); } OopTaskQueueSet* queues() { return _queues; } }; diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/cms/parNewGeneration.cpp --- a/src/hotspot/share/gc/cms/parNewGeneration.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -75,7 +75,7 @@ Stack* overflow_stacks_, PreservedMarks* preserved_marks_, size_t desired_plab_sz_, - ParallelTaskTerminator& term_) : + TaskTerminator& term_) : _work_queue(work_queue_set_->queue(thread_num_)), _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL), _preserved_marks(preserved_marks_), @@ -87,7 +87,7 @@ _old_gen_root_closure(young_gen_, this), _evacuate_followers(this, &_to_space_closure, &_old_gen_closure, &_to_space_root_closure, young_gen_, &_old_gen_root_closure, - work_queue_set_, &term_), + work_queue_set_, term_.terminator()), _is_alive_closure(young_gen_), _scan_weak_ref_closure(young_gen_, this), _keep_alive_closure(&_scan_weak_ref_closure), @@ -306,7 +306,7 @@ Stack* overflow_stacks_, PreservedMarksSet& preserved_marks_set, size_t desired_plab_sz, - ParallelTaskTerminator& term); + TaskTerminator& term); ~ParScanThreadStateSet() { TASKQUEUE_STATS_ONLY(reset_stats()); } @@ -327,14 +327,14 @@ #endif // TASKQUEUE_STATS private: - ParallelTaskTerminator& _term; + TaskTerminator& _term; ParNewGeneration& _young_gen; Generation& _old_gen; ParScanThreadState* _per_thread_states; const int _num_threads; public: bool is_valid(int id) const { return id < _num_threads; } - ParallelTaskTerminator* terminator() { return &_term; } + ParallelTaskTerminator* terminator() { return _term.terminator(); } }; ParScanThreadStateSet::ParScanThreadStateSet(int num_threads, @@ -345,7 +345,7 @@ Stack* overflow_stacks, PreservedMarksSet& preserved_marks_set, size_t desired_plab_sz, - ParallelTaskTerminator& term) + TaskTerminator& term) : _term(term), _young_gen(young_gen), _old_gen(old_gen), @@ -379,7 +379,7 @@ } void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed) { - _term.reset_for_reuse(active_threads); + _term.terminator()->reset_for_reuse(active_threads); if (promotion_failed) { for (int i = 0; i < _num_threads; ++i) { thread_state(i).print_promotion_failure_size(); @@ -904,7 +904,7 @@ // Always set the terminator for the active number of workers // because only those workers go through the termination protocol. - ParallelTaskTerminator _term(active_workers, task_queues()); + TaskTerminator _term(active_workers, task_queues()); ParScanThreadStateSet thread_state_set(active_workers, *to(), *this, *_old_gen, *task_queues(), _overflow_stacks, _preserved_marks_set, diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/cms/parNewGeneration.hpp --- a/src/hotspot/share/gc/cms/parNewGeneration.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/cms/parNewGeneration.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -133,7 +133,7 @@ Stack* overflow_stacks_, PreservedMarks* preserved_marks_, size_t desired_plab_sz_, - ParallelTaskTerminator& term_); + TaskTerminator& term_); public: AgeTable* age_table() {return &_ageTable;} diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1CollectedHeap.cpp --- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -3215,7 +3215,7 @@ G1ParScanThreadStateSet* _pss; RefToScanQueueSet* _queues; G1RootProcessor* _root_processor; - ParallelTaskTerminator _terminator; + TaskTerminator _terminator; uint _n_workers; public: @@ -3260,7 +3260,7 @@ size_t evac_term_attempts = 0; { double start = os::elapsedTime(); - G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, &_terminator, G1GCPhaseTimes::ObjCopy); + G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, _terminator.terminator(), G1GCPhaseTimes::ObjCopy); evac.do_void(); evac_term_attempts = evac.term_attempts(); @@ -3572,8 +3572,8 @@ assert(_workers->active_workers() >= ergo_workers, "Ergonomically chosen workers (%u) should be less than or equal to active workers (%u)", ergo_workers, _workers->active_workers()); - ParallelTaskTerminator terminator(ergo_workers, _queues); - G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator); + TaskTerminator terminator(ergo_workers, _queues); + G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, terminator.terminator()); _workers->run_task(&proc_task_proxy, ergo_workers); } diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1ConcurrentMark.cpp --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -378,7 +378,7 @@ // _tasks set inside the constructor _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)), - _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)), + _terminator((int) _max_num_tasks, _task_queues), _first_overflow_barrier_sync(), _second_overflow_barrier_sync(), @@ -588,7 +588,7 @@ _num_active_tasks = active_tasks; // Need to update the three data structures below according to the // number of active threads for this phase. - _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); + _terminator = TaskTerminator((int) active_tasks, _task_queues); _first_overflow_barrier_sync.set_n_workers((int) active_tasks); _second_overflow_barrier_sync.set_n_workers((int) active_tasks); } diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1ConcurrentMark.hpp --- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -325,8 +325,8 @@ uint _num_active_tasks; // Number of tasks currently active G1CMTask** _tasks; // Task queue array (max_worker_id length) - G1CMTaskQueueSet* _task_queues; // Task queue set - ParallelTaskTerminator _terminator; // For termination + G1CMTaskQueueSet* _task_queues; // Task queue set + TaskTerminator _terminator; // For termination // Two sync barriers that are used to synchronize tasks when an // overflow occurs. The algorithm is the following. All tasks enter @@ -412,10 +412,10 @@ // Prints all gathered CM-related statistics void print_stats(); - HeapWord* finger() { return _finger; } - bool concurrent() { return _concurrent; } - uint active_tasks() { return _num_active_tasks; } - ParallelTaskTerminator* terminator() { return &_terminator; } + HeapWord* finger() { return _finger; } + bool concurrent() { return _concurrent; } + uint active_tasks() { return _num_active_tasks; } + ParallelTaskTerminator* terminator() const { return _terminator.terminator(); } // Claims the next available region to be scanned by a marking // task/thread. It might return NULL if the next region is empty or diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp --- a/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -61,7 +61,7 @@ } // Mark stack is populated, now process and drain it. - marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), &_terminator); + marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), _terminator.terminator()); // This is the point where the entire marking should have completed. assert(marker->oop_stack()->is_empty(), "Marking should have completed"); diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1FullGCMarkTask.hpp --- a/src/hotspot/share/gc/g1/g1FullGCMarkTask.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1FullGCMarkTask.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -36,7 +36,7 @@ class G1FullGCMarkTask : public G1FullGCTask { G1RootProcessor _root_processor; - ParallelTaskTerminator _terminator; + TaskTerminator _terminator; public: G1FullGCMarkTask(G1FullCollector* collector); diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp --- a/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -32,6 +32,7 @@ #include "gc/g1/g1StringDedup.hpp" #include "gc/g1/heapRegionManager.hpp" #include "gc/shared/referenceProcessor.hpp" +#include "gc/shared/taskqueue.hpp" #include "utilities/ticks.hpp" class G1FullGCTracer; @@ -58,9 +59,9 @@ class G1RefProcTaskProxy : public AbstractGangTask { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; - ProcessTask& _proc_task; - G1FullCollector* _collector; - ParallelTaskTerminator _terminator; + ProcessTask& _proc_task; + G1FullCollector* _collector; + TaskTerminator _terminator; public: G1RefProcTaskProxy(ProcessTask& proc_task, diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/parallel/pcTasks.cpp --- a/src/hotspot/share/gc/parallel/pcTasks.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/parallel/pcTasks.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -158,14 +158,15 @@ "Ergonomically chosen workers (%u) must be equal to active workers (%u)", ergo_workers, active_gc_threads); OopTaskQueueSet* qset = ParCompactionManager::stack_array(); - ParallelTaskTerminator terminator(active_gc_threads, qset); + TaskTerminator terminator(active_gc_threads, qset); + GCTaskQueue* q = GCTaskQueue::create(); for(uint i=0; ienqueue(new RefProcTaskProxy(task, i)); } if (task.marks_oops_alive() && (active_gc_threads>1)) { for (uint j=0; jenqueue(new StealMarkingTask(&terminator)); + q->enqueue(new StealMarkingTask(terminator.terminator())); } } PSParallelCompact::gc_task_manager()->execute_and_wait(q); diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/parallel/psParallelCompact.cpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -2100,7 +2100,7 @@ uint parallel_gc_threads = heap->gc_task_manager()->workers(); uint active_gc_threads = heap->gc_task_manager()->active_workers(); TaskQueueSetSuper* qset = ParCompactionManager::stack_array(); - ParallelTaskTerminator terminator(active_gc_threads, qset); + TaskTerminator terminator(active_gc_threads, qset); PCMarkAndPushClosure mark_and_push_closure(cm); ParCompactionManager::FollowStackClosure follow_stack_closure(cm); @@ -2129,7 +2129,7 @@ if (active_gc_threads > 1) { for (uint j = 0; j < active_gc_threads; j++) { - q->enqueue(new StealMarkingTask(&terminator)); + q->enqueue(new StealMarkingTask(terminator.terminator())); } } @@ -2459,12 +2459,12 @@ uint parallel_gc_threads = heap->gc_task_manager()->workers(); uint active_gc_threads = heap->gc_task_manager()->active_workers(); TaskQueueSetSuper* qset = ParCompactionManager::region_array(); - ParallelTaskTerminator terminator(active_gc_threads, qset); + TaskTerminator terminator(active_gc_threads, qset); GCTaskQueue* q = GCTaskQueue::create(); prepare_region_draining_tasks(q, active_gc_threads); enqueue_dense_prefix_tasks(q, active_gc_threads); - enqueue_region_stealing_tasks(q, &terminator, active_gc_threads); + enqueue_region_stealing_tasks(q, terminator.terminator(), active_gc_threads); { GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer); diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/parallel/psScavenge.cpp --- a/src/hotspot/share/gc/parallel/psScavenge.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/parallel/psScavenge.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -168,11 +168,11 @@ for(uint i=0; i < active_workers; i++) { q->enqueue(new PSRefProcTaskProxy(task, i)); } - ParallelTaskTerminator terminator(active_workers, - (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()); + TaskTerminator terminator(active_workers, + (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()); if (task.marks_oops_alive() && active_workers > 1) { for (uint j = 0; j < active_workers; j++) { - q->enqueue(new StealTask(&terminator)); + q->enqueue(new StealTask(terminator.terminator())); } } manager->execute_and_wait(q); @@ -380,16 +380,15 @@ q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::jvmti)); q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache)); - ParallelTaskTerminator terminator( - active_workers, - (TaskQueueSetSuper*) promotion_manager->stack_array_depth()); + TaskTerminator terminator(active_workers, + (TaskQueueSetSuper*) promotion_manager->stack_array_depth()); // If active_workers can exceed 1, add a StrealTask. // PSPromotionManager::drain_stacks_depth() does not fully drain its // stacks and expects a StealTask to complete the draining if // ParallelGCThreads is > 1. if (gc_task_manager()->workers() > 1) { for (uint j = 0; j < active_workers; j++) { - q->enqueue(new StealTask(&terminator)); + q->enqueue(new StealTask(terminator.terminator())); } } diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/shared/gc_globals.hpp --- a/src/hotspot/share/gc/shared/gc_globals.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/shared/gc_globals.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -348,6 +348,10 @@ develop(uintx, PromotionFailureALotInterval, 5, \ "Total collections between promotion failures a lot") \ \ + diagnostic(bool, UseOWSTTaskTerminator, true, \ + "Use Optimized Work Stealing Threads task termination " \ + "protocol") \ + \ experimental(uintx, WorkStealingSleepMillis, 1, \ "Sleep time when sleep is used for yields") \ \ diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/shared/owstTaskTerminator.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/share/gc/shared/owstTaskTerminator.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2018, Red Hat, Inc. All rights reserved. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "gc/shared/owstTaskTerminator.hpp" +#include "logging/log.hpp" + +bool OWSTTaskTerminator::exit_termination(size_t tasks, TerminatorTerminator* terminator) { + return tasks > 0 || (terminator != NULL && terminator->should_exit_termination()); +} + +bool OWSTTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + assert(_n_threads > 0, "Initialization is incorrect"); + assert(_offered_termination < _n_threads, "Invariant"); + assert(_blocker != NULL, "Invariant"); + + // Single worker, done + if (_n_threads == 1) { + _offered_termination = 1; + return true; + } + + _blocker->lock_without_safepoint_check(); + // All arrived, done + _offered_termination++; + if (_offered_termination == _n_threads) { + _blocker->notify_all(); + _blocker->unlock(); + return true; + } + + Thread* the_thread = Thread::current(); + while (true) { + if (_spin_master == NULL) { + _spin_master = the_thread; + + _blocker->unlock(); + + if (do_spin_master_work(terminator)) { + assert(_offered_termination == _n_threads, "termination condition"); + return true; + } else { + _blocker->lock_without_safepoint_check(); + } + } else { + _blocker->wait(true, WorkStealingSleepMillis); + + if (_offered_termination == _n_threads) { + _blocker->unlock(); + return true; + } + } + + size_t tasks = tasks_in_queue_set(); + if (exit_termination(tasks, terminator)) { + _offered_termination--; + _blocker->unlock(); + return false; + } + } +} + +bool OWSTTaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) { + uint yield_count = 0; + // Number of hard spin loops done since last yield + uint hard_spin_count = 0; + // Number of iterations in the hard spin loop. + uint hard_spin_limit = WorkStealingHardSpins; + + // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. + // If it is greater than 0, then start with a small number + // of spins and increase number with each turn at spinning until + // the count of hard spins exceeds WorkStealingSpinToYieldRatio. + // Then do a yield() call and start spinning afresh. + if (WorkStealingSpinToYieldRatio > 0) { + hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; + hard_spin_limit = MAX2(hard_spin_limit, 1U); + } + // Remember the initial spin limit. + uint hard_spin_start = hard_spin_limit; + + // Loop waiting for all threads to offer termination or + // more work. + while (true) { + // Look for more work. + // Periodically sleep() instead of yield() to give threads + // waiting on the cores the chance to grab this code + if (yield_count <= WorkStealingYieldsBeforeSleep) { + // Do a yield or hardspin. For purposes of deciding whether + // to sleep, count this as a yield. + yield_count++; + + // Periodically call yield() instead spinning + // After WorkStealingSpinToYieldRatio spins, do a yield() call + // and reset the counts and starting limit. + if (hard_spin_count > WorkStealingSpinToYieldRatio) { + yield(); + hard_spin_count = 0; + hard_spin_limit = hard_spin_start; +#ifdef TRACESPINNING + _total_yields++; +#endif + } else { + // Hard spin this time + // Increase the hard spinning period but only up to a limit. + hard_spin_limit = MIN2(2*hard_spin_limit, + (uint) WorkStealingHardSpins); + for (uint j = 0; j < hard_spin_limit; j++) { + SpinPause(); + } + hard_spin_count++; +#ifdef TRACESPINNING + _total_spins++; +#endif + } + } else { + log_develop_trace(gc, task)("OWSTTaskTerminator::do_spin_master_work() thread " PTR_FORMAT " sleeps after %u yields", + p2i(Thread::current()), yield_count); + yield_count = 0; + + MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); + _spin_master = NULL; + locker.wait(Mutex::_no_safepoint_check_flag, WorkStealingSleepMillis); + if (_spin_master == NULL) { + _spin_master = Thread::current(); + } else { + return false; + } + } + +#ifdef TRACESPINNING + _total_peeks++; +#endif + size_t tasks = tasks_in_queue_set(); + if (exit_termination(tasks, terminator)) { + MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); + if (tasks >= _offered_termination - 1) { + locker.notify_all(); + } else { + for (; tasks > 1; tasks--) { + locker.notify(); + } + } + _spin_master = NULL; + return false; + } else if (_offered_termination == _n_threads) { + return true; + } + } +} diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/shared/owstTaskTerminator.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hotspot/share/gc/shared/owstTaskTerminator.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018, Red Hat, Inc. All rights reserved. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#ifndef SHARE_VM_GC_SHARED_OWSTTASKTERMINATOR_HPP +#define SHARE_VM_GC_SHARED_OWSTTASKTERMINATOR_HPP + +#include "gc/shared/taskqueue.hpp" +#include "runtime/mutex.hpp" +#include "runtime/thread.hpp" + +/* + * OWST stands for Optimized Work Stealing Threads + * + * This is an enhanced implementation of Google's work stealing + * protocol, which is described in the paper: + * "Wessam Hassanein. 2016. Understanding and improving JVM GC work + * stealing at the data center scale. In Proceedings of the 2016 ACM + * SIGPLAN International Symposium on Memory Management (ISMM 2016). ACM, + * New York, NY, USA, 46-54. DOI: https://doi.org/10.1145/2926697.2926706" + * + * Instead of a dedicated spin-master, our implementation will let spin-master relinquish + * the role before it goes to sleep/wait, allowing newly arrived threads to compete for the role. + * The intention of above enhancement is to reduce spin-master's latency on detecting new tasks + * for stealing and termination condition. + */ + +class OWSTTaskTerminator: public ParallelTaskTerminator { +private: + Monitor* _blocker; + Thread* _spin_master; + +public: + OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : + ParallelTaskTerminator(n_threads, queue_set), _spin_master(NULL) { + _blocker = new Monitor(Mutex::leaf, "OWSTTaskTerminator", false, Monitor::_safepoint_check_never); + } + + virtual ~OWSTTaskTerminator() { + assert(_blocker != NULL, "Can not be NULL"); + delete _blocker; + } + + bool offer_termination(TerminatorTerminator* terminator); + +protected: + // If should exit current termination protocol + virtual bool exit_termination(size_t tasks, TerminatorTerminator* terminator); + +private: + size_t tasks_in_queue_set() { return _queue_set->tasks(); } + + /* + * Perform spin-master task. + * Return true if termination condition is detected, otherwise return false + */ + bool do_spin_master_work(TerminatorTerminator* terminator); +}; + + +#endif // SHARE_VM_GC_SHARED_OWSTTASKTERMINATOR_HPP diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/shared/taskqueue.cpp --- a/src/hotspot/share/gc/shared/taskqueue.cpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/shared/taskqueue.cpp Fri Dec 07 13:55:06 2018 -0500 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "gc/shared/taskqueue.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "oops/oop.inline.hpp" #include "logging/log.hpp" #include "runtime/atomic.hpp" @@ -247,3 +248,25 @@ reset_for_reuse(); _n_threads = n_threads; } + +TaskTerminator::TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : + _terminator(UseOWSTTaskTerminator ? new OWSTTaskTerminator(n_threads, queue_set) + : new ParallelTaskTerminator(n_threads, queue_set)) { +} + +TaskTerminator::~TaskTerminator() { + if (_terminator != NULL) { + delete _terminator; + } +} + +// Move assignment +TaskTerminator& TaskTerminator::operator=(const TaskTerminator& o) { + if (_terminator != NULL) { + delete _terminator; + } + _terminator = o.terminator(); + const_cast(o)._terminator = NULL; + return *this; +} + diff -r d2f118d3f8e7 -r bec57b4a6d69 src/hotspot/share/gc/shared/taskqueue.hpp --- a/src/hotspot/share/gc/shared/taskqueue.hpp Fri Dec 07 12:46:31 2018 +0800 +++ b/src/hotspot/share/gc/shared/taskqueue.hpp Fri Dec 07 13:55:06 2018 -0500 @@ -447,8 +447,8 @@ #undef TRACESPINNING -class ParallelTaskTerminator: public StackObj { -private: +class ParallelTaskTerminator: public CHeapObj { +protected: uint _n_threads; TaskQueueSetSuper* _queue_set; volatile uint _offered_termination; @@ -481,7 +481,7 @@ // As above, but it also terminates if the should_exit_termination() // method of the terminator parameter returns true. If terminator is // NULL, then it is ignored. - bool offer_termination(TerminatorTerminator* terminator); + virtual bool offer_termination(TerminatorTerminator* terminator); // Reset the terminator, so that it may be reused again. // The caller is responsible for ensuring that this is done @@ -500,6 +500,38 @@ #endif }; +#ifdef _MSC_VER +#pragma warning(push) +// warning C4521: multiple copy constructors specified +#pragma warning(disable:4521) +// warning C4522: multiple assignment operators specified +#pragma warning(disable:4522) +#endif + +class TaskTerminator : public StackObj { +private: + ParallelTaskTerminator* _terminator; + + // Disable following copy constructors and assignment operator + TaskTerminator(TaskTerminator& o) { } + TaskTerminator(const TaskTerminator& o) { } + TaskTerminator& operator=(TaskTerminator& o) { return *this; } +public: + TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); + ~TaskTerminator(); + + // Move assignment + TaskTerminator& operator=(const TaskTerminator& o); + + ParallelTaskTerminator* terminator() const { + return _terminator; + } +}; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + typedef GenericTaskQueue OopTaskQueue; typedef GenericTaskQueueSet OopTaskQueueSet;