# HG changeset patch # User lkorinth # Date 1565939906 -7200 # Node ID f7ca942a2714cced2cdd0597d3353c2bedfb5b8f # Parent fc82b6cb8b14f2622a9e162deb607611574d2921 8224661: Parallel GC: Use WorkGang (3: UpdateDensePrefixAndCompactionTask) Reviewed-by: stefank, kbarrett, tschatzl diff -r fc82b6cb8b14 -r f7ca942a2714 src/hotspot/share/gc/parallel/pcTasks.cpp --- a/src/hotspot/share/gc/parallel/pcTasks.cpp Fri Aug 16 09:18:23 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "aot/aotLoader.hpp" -#include "classfile/classLoaderDataGraph.hpp" -#include "classfile/systemDictionary.hpp" -#include "code/codeCache.hpp" -#include "gc/parallel/parallelScavengeHeap.hpp" -#include "gc/parallel/pcTasks.hpp" -#include "gc/parallel/psCompactionManager.inline.hpp" -#include "gc/parallel/psParallelCompact.inline.hpp" -#include "gc/shared/collectedHeap.hpp" -#include "gc/shared/gcTimer.hpp" -#include "gc/shared/gcTraceTime.inline.hpp" -#include "logging/log.hpp" -#include "memory/iterator.inline.hpp" -#include "memory/resourceArea.hpp" -#include "memory/universe.hpp" -#include "oops/objArrayKlass.inline.hpp" -#include "oops/oop.inline.hpp" -#include "prims/jvmtiExport.hpp" -#include "runtime/jniHandles.hpp" -#include "runtime/thread.hpp" -#include "runtime/vmThread.hpp" -#include "services/management.hpp" -#include "utilities/stack.inline.hpp" - -// -// CompactionWithStealingTask -// - -CompactionWithStealingTask::CompactionWithStealingTask(ParallelTaskTerminator* t): - _terminator(t) {} - -void CompactionWithStealingTask::do_it(GCTaskManager* manager, uint which) { - assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); - - ParCompactionManager* cm = - ParCompactionManager::gc_thread_compaction_manager(which); - - // Drain the stacks that have been preloaded with regions - // that are ready to fill. - - cm->drain_region_stacks(); - - guarantee(cm->region_stack()->is_empty(), "Not empty"); - - size_t region_index = 0; - - while(true) { - if (ParCompactionManager::steal(which, region_index)) { - PSParallelCompact::fill_and_update_region(cm, region_index); - cm->drain_region_stacks(); - } else { - if (terminator()->offer_termination()) { - break; - } - // Go around again. - } - } - return; -} - -UpdateDensePrefixTask::UpdateDensePrefixTask( - PSParallelCompact::SpaceId space_id, - size_t region_index_start, - size_t region_index_end) : - _space_id(space_id), _region_index_start(region_index_start), - _region_index_end(region_index_end) {} - -void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) { - - ParCompactionManager* cm = - ParCompactionManager::gc_thread_compaction_manager(which); - - PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, - _space_id, - _region_index_start, - _region_index_end); -} diff -r fc82b6cb8b14 -r f7ca942a2714 src/hotspot/share/gc/parallel/pcTasks.hpp --- a/src/hotspot/share/gc/parallel/pcTasks.hpp Fri Aug 16 09:18:23 2019 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_GC_PARALLEL_PCTASKS_HPP -#define SHARE_GC_PARALLEL_PCTASKS_HPP - -#include "gc/parallel/gcTaskManager.hpp" -#include "gc/parallel/psParallelCompact.hpp" -#include "gc/parallel/psTasks.hpp" -#include "gc/shared/referenceProcessor.hpp" - - -// Tasks for parallel compaction of the old generation -// -// Tasks are created and enqueued on a task queue. The -// tasks for parallel old collector for marking objects -// are MarkFromRootsTask and ThreadRootsMarkingTask. -// -// MarkFromRootsTask's are created -// with a root group (e.g., jni_handles) and when the do_it() -// method of a MarkFromRootsTask is executed, it starts marking -// form it's root group. -// -// ThreadRootsMarkingTask's are created for each Java thread. When -// the do_it() method of a ThreadRootsMarkingTask is executed, it -// starts marking from the thread's roots. -// -// The enqueueing of the MarkFromRootsTask and ThreadRootsMarkingTask -// do little more than create the task and put it on a queue. The -// queue is a GCTaskQueue and threads steal tasks from this GCTaskQueue. -// -// In addition to the MarkFromRootsTask and ThreadRootsMarkingTask -// tasks there are StealMarkingTask tasks. The StealMarkingTask's -// steal a reference from the marking stack of another -// thread and transitively marks the object of the reference -// and internal references. After successfully stealing a reference -// and marking it, the StealMarkingTask drains its marking stack -// stack before attempting another steal. -// -// ThreadRootsMarkingTask -// -// This task marks from the roots of a single thread. This task -// enables marking of thread roots in parallel. -// - -class ParallelTaskTerminator; - -// -// CompactionWithStealingTask -// -// This task is used to distribute work to idle threads. -// - -class CompactionWithStealingTask : public GCTask { - private: - ParallelTaskTerminator* const _terminator; - public: - CompactionWithStealingTask(ParallelTaskTerminator* t); - - char* name() { return (char *)"steal-region-task"; } - ParallelTaskTerminator* terminator() { return _terminator; } - - virtual void do_it(GCTaskManager* manager, uint which); -}; - -// -// UpdateDensePrefixTask -// -// This task is used to update the dense prefix -// of a space. -// - -class UpdateDensePrefixTask : public GCTask { - private: - PSParallelCompact::SpaceId _space_id; - size_t _region_index_start; - size_t _region_index_end; - - public: - char* name() { return (char *)"update-dense_prefix-task"; } - - UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id, - size_t region_index_start, - size_t region_index_end); - - virtual void do_it(GCTaskManager* manager, uint which); -}; -#endif // SHARE_GC_PARALLEL_PCTASKS_HPP diff -r fc82b6cb8b14 -r f7ca942a2714 src/hotspot/share/gc/parallel/psCompactionManager.hpp --- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp Fri Aug 16 09:18:23 2019 +0200 +++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp Fri Aug 16 09:18:26 2019 +0200 @@ -46,6 +46,7 @@ friend class IdleGCTask; friend class PCRefProcTask; friend class MarkFromRootsTask; + friend class UpdateDensePrefixAndCompactionTask; public: diff -r fc82b6cb8b14 -r f7ca942a2714 src/hotspot/share/gc/parallel/psParallelCompact.cpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp Fri Aug 16 09:18:23 2019 +0200 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp Fri Aug 16 09:18:26 2019 +0200 @@ -34,7 +34,6 @@ #include "gc/parallel/parallelArguments.hpp" #include "gc/parallel/parallelScavengeHeap.inline.hpp" #include "gc/parallel/parMarkBitMap.inline.hpp" -#include "gc/parallel/pcTasks.hpp" #include "gc/parallel/psAdaptiveSizePolicy.hpp" #include "gc/parallel/psCompactionManager.inline.hpp" #include "gc/parallel/psOldGen.hpp" @@ -2409,13 +2408,12 @@ } }; -void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads) +void PSParallelCompact::prepare_region_draining_tasks(uint parallel_gc_threads) { GCTraceTime(Trace, gc, phases) tm("Drain Task Setup", &_gc_timer); // Find the threads that are active - unsigned int which = 0; + uint worker_id = 0; // Find all regions that are available (can be filled immediately) and // distribute them to the thread stacks. The iteration is done in reverse @@ -2423,7 +2421,6 @@ const ParallelCompactData& sd = PSParallelCompact::summary_data(); - which = 0; // id + 1 is used to test termination so unsigned can // be used with an old_space_id == 0. FillableRegionLogger region_logger; @@ -2438,12 +2435,12 @@ for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) { if (sd.region(cur)->claim_unsafe()) { - ParCompactionManager* cm = ParCompactionManager::manager_array(which); + ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id); cm->region_stack()->push(cur); region_logger.handle(cur); // Assign regions to tasks in round-robin fashion. - if (++which == parallel_gc_threads) { - which = 0; + if (++worker_id == parallel_gc_threads) { + worker_id = 0; } } } @@ -2451,10 +2448,40 @@ } } +class TaskQueue : StackObj { + volatile uint _counter; + uint _size; + uint _insert_index; + PSParallelCompact::UpdateDensePrefixTask* _backing_array; +public: + explicit TaskQueue(uint size) : _counter(0), _size(size), _insert_index(0), _backing_array(NULL) { + _backing_array = NEW_C_HEAP_ARRAY(PSParallelCompact::UpdateDensePrefixTask, _size, mtGC); + } + ~TaskQueue() { + assert(_counter >= _insert_index, "not all queue elements were claimed"); + FREE_C_HEAP_ARRAY(T, _backing_array); + } + + void push(const PSParallelCompact::UpdateDensePrefixTask& value) { + assert(_insert_index < _size, "too small backing array"); + _backing_array[_insert_index++] = value; + } + + bool try_claim(PSParallelCompact::UpdateDensePrefixTask& reference) { + uint claimed = Atomic::add(1u, &_counter) - 1; // -1 is so that we start with zero + if (claimed < _insert_index) { + reference = _backing_array[claimed]; + return true; + } else { + return false; + } + } +}; + #define PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING 4 -void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q, - uint parallel_gc_threads) { +void PSParallelCompact::enqueue_dense_prefix_tasks(TaskQueue& task_queue, + uint parallel_gc_threads) { GCTraceTime(Trace, gc, phases) tm("Dense Prefix Task Setup", &_gc_timer); ParallelCompactData& sd = PSParallelCompact::summary_data(); @@ -2517,35 +2544,22 @@ // region_index_end is not processed size_t region_index_end = MIN2(region_index_start + regions_per_thread, region_index_end_dense_prefix); - q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id), - region_index_start, - region_index_end)); + task_queue.push(UpdateDensePrefixTask(SpaceId(space_id), + region_index_start, + region_index_end)); region_index_start = region_index_end; } } // This gets any part of the dense prefix that did not // fit evenly. if (region_index_start < region_index_end_dense_prefix) { - q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id), - region_index_start, - region_index_end_dense_prefix)); + task_queue.push(UpdateDensePrefixTask(SpaceId(space_id), + region_index_start, + region_index_end_dense_prefix)); } } } -void PSParallelCompact::enqueue_region_stealing_tasks( - GCTaskQueue* q, - ParallelTaskTerminator* terminator_ptr, - uint parallel_gc_threads) { - GCTraceTime(Trace, gc, phases) tm("Steal Task Setup", &_gc_timer); - - // Once a thread has drained it's stack, it should try to steal regions from - // other threads. - for (uint j = 0; j < parallel_gc_threads; j++) { - q->enqueue(new CompactionWithStealingTask(terminator_ptr)); - } -} - #ifdef ASSERT // Write a histogram of the number of times the block table was filled for a // region. @@ -2588,26 +2602,87 @@ } #endif // #ifdef ASSERT +static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, uint worker_id) { + assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); + + ParCompactionManager* cm = + ParCompactionManager::gc_thread_compaction_manager(worker_id); + + // Drain the stacks that have been preloaded with regions + // that are ready to fill. + + cm->drain_region_stacks(); + + guarantee(cm->region_stack()->is_empty(), "Not empty"); + + size_t region_index = 0; + + while (true) { + if (ParCompactionManager::steal(worker_id, region_index)) { + PSParallelCompact::fill_and_update_region(cm, region_index); + cm->drain_region_stacks(); + } else { + if (terminator->offer_termination()) { + break; + } + // Go around again. + } + } + return; +} + +class UpdateDensePrefixAndCompactionTask: public AbstractGangTask { + typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; + TaskQueue& _tq; + TaskTerminator _terminator; + uint _active_workers; + +public: + UpdateDensePrefixAndCompactionTask(TaskQueue& tq, uint active_workers) : + AbstractGangTask("UpdateDensePrefixAndCompactionTask"), + _tq(tq), + _terminator(active_workers, ParCompactionManager::region_array()), + _active_workers(active_workers) { + } + virtual void work(uint worker_id) { + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id); + + for (PSParallelCompact::UpdateDensePrefixTask task; _tq.try_claim(task); /* empty */) { + PSParallelCompact::update_and_deadwood_in_dense_prefix(cm, + task._space_id, + task._region_index_start, + task._region_index_end); + } + + // Once a thread has drained it's stack, it should try to steal regions from + // other threads. + compaction_with_stealing_work(_terminator.terminator(), worker_id); + } +}; + void PSParallelCompact::compact() { GCTraceTime(Info, gc, phases) tm("Compaction Phase", &_gc_timer); ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); PSOldGen* old_gen = heap->old_gen(); old_gen->start_array()->reset(); - uint parallel_gc_threads = heap->gc_task_manager()->workers(); - uint active_gc_threads = heap->gc_task_manager()->active_workers(); - TaskQueueSetSuper* qset = ParCompactionManager::region_array(); - TaskTerminator terminator(active_gc_threads, qset); - - GCTaskQueue* q = GCTaskQueue::create(); - prepare_region_draining_tasks(q, active_gc_threads); - enqueue_dense_prefix_tasks(q, active_gc_threads); - enqueue_region_stealing_tasks(q, terminator.terminator(), active_gc_threads); + uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers(); + + // for [0..last_space_id) + // for [0..active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING) + // push + // push + // + // max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1) + TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)); + prepare_region_draining_tasks(active_gc_threads); + enqueue_dense_prefix_tasks(task_queue, active_gc_threads); { GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer); - gc_task_manager()->execute_and_wait(q); + UpdateDensePrefixAndCompactionTask task(task_queue, active_gc_threads); + ParallelScavengeHeap::heap()->workers().run_task(&task); #ifdef ASSERT // Verify that all regions have been processed before the deferred updates. diff -r fc82b6cb8b14 -r f7ca942a2714 src/hotspot/share/gc/parallel/psParallelCompact.hpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp Fri Aug 16 09:18:23 2019 +0200 +++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp Fri Aug 16 09:18:26 2019 +0200 @@ -913,6 +913,8 @@ // region that can be put on the ready list. The regions are atomically added // and removed from the ready list. +class TaskQueue; + class PSParallelCompact : AllStatic { public: // Convenient access to type names. @@ -925,6 +927,24 @@ from_space_id, to_space_id, last_space_id } SpaceId; + struct UpdateDensePrefixTask : public CHeapObj { + SpaceId _space_id; + size_t _region_index_start; + size_t _region_index_end; + + UpdateDensePrefixTask() : + _space_id(SpaceId(0)), + _region_index_start(0), + _region_index_end(0) {} + + UpdateDensePrefixTask(SpaceId space_id, + size_t region_index_start, + size_t region_index_end) : + _space_id(space_id), + _region_index_start(region_index_start), + _region_index_end(region_index_end) {} + }; + public: // Inline closure decls // @@ -1050,19 +1070,12 @@ static void compact(); // Add available regions to the stack and draining tasks to the task queue. - static void prepare_region_draining_tasks(GCTaskQueue* q, - uint parallel_gc_threads); + static void prepare_region_draining_tasks(uint parallel_gc_threads); // Add dense prefix update tasks to the task queue. - static void enqueue_dense_prefix_tasks(GCTaskQueue* q, + static void enqueue_dense_prefix_tasks(TaskQueue& task_queue, uint parallel_gc_threads); - // Add region stealing tasks to the task queue. - static void enqueue_region_stealing_tasks( - GCTaskQueue* q, - ParallelTaskTerminator* terminator_ptr, - uint parallel_gc_threads); - // If objects are left in eden after a collection, try to move the boundary // and absorb them into the old gen. Returns true if eden was emptied. static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,