8224661: Parallel GC: Use WorkGang (3: UpdateDensePrefixAndCompactionTask)
authorlkorinth
Fri, 16 Aug 2019 09:18:26 +0200
changeset 57769 f7ca942a2714
parent 57768 fc82b6cb8b14
child 57770 b5ca334ed54c
8224661: Parallel GC: Use WorkGang (3: UpdateDensePrefixAndCompactionTask) Reviewed-by: stefank, kbarrett, tschatzl
src/hotspot/share/gc/parallel/pcTasks.cpp
src/hotspot/share/gc/parallel/pcTasks.hpp
src/hotspot/share/gc/parallel/psCompactionManager.hpp
src/hotspot/share/gc/parallel/psParallelCompact.cpp
src/hotspot/share/gc/parallel/psParallelCompact.hpp
--- a/src/hotspot/share/gc/parallel/pcTasks.cpp	Fri Aug 16 09:18:23 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "aot/aotLoader.hpp"
-#include "classfile/classLoaderDataGraph.hpp"
-#include "classfile/systemDictionary.hpp"
-#include "code/codeCache.hpp"
-#include "gc/parallel/parallelScavengeHeap.hpp"
-#include "gc/parallel/pcTasks.hpp"
-#include "gc/parallel/psCompactionManager.inline.hpp"
-#include "gc/parallel/psParallelCompact.inline.hpp"
-#include "gc/shared/collectedHeap.hpp"
-#include "gc/shared/gcTimer.hpp"
-#include "gc/shared/gcTraceTime.inline.hpp"
-#include "logging/log.hpp"
-#include "memory/iterator.inline.hpp"
-#include "memory/resourceArea.hpp"
-#include "memory/universe.hpp"
-#include "oops/objArrayKlass.inline.hpp"
-#include "oops/oop.inline.hpp"
-#include "prims/jvmtiExport.hpp"
-#include "runtime/jniHandles.hpp"
-#include "runtime/thread.hpp"
-#include "runtime/vmThread.hpp"
-#include "services/management.hpp"
-#include "utilities/stack.inline.hpp"
-
-//
-// CompactionWithStealingTask
-//
-
-CompactionWithStealingTask::CompactionWithStealingTask(ParallelTaskTerminator* t):
-  _terminator(t) {}
-
-void CompactionWithStealingTask::do_it(GCTaskManager* manager, uint which) {
-  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
-
-  ParCompactionManager* cm =
-    ParCompactionManager::gc_thread_compaction_manager(which);
-
-  // Drain the stacks that have been preloaded with regions
-  // that are ready to fill.
-
-  cm->drain_region_stacks();
-
-  guarantee(cm->region_stack()->is_empty(), "Not empty");
-
-  size_t region_index = 0;
-
-  while(true) {
-    if (ParCompactionManager::steal(which, region_index)) {
-      PSParallelCompact::fill_and_update_region(cm, region_index);
-      cm->drain_region_stacks();
-    } else {
-      if (terminator()->offer_termination()) {
-        break;
-      }
-      // Go around again.
-    }
-  }
-  return;
-}
-
-UpdateDensePrefixTask::UpdateDensePrefixTask(
-                                   PSParallelCompact::SpaceId space_id,
-                                   size_t region_index_start,
-                                   size_t region_index_end) :
-  _space_id(space_id), _region_index_start(region_index_start),
-  _region_index_end(region_index_end) {}
-
-void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
-
-  ParCompactionManager* cm =
-    ParCompactionManager::gc_thread_compaction_manager(which);
-
-  PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
-                                                         _space_id,
-                                                         _region_index_start,
-                                                         _region_index_end);
-}
--- a/src/hotspot/share/gc/parallel/pcTasks.hpp	Fri Aug 16 09:18:23 2019 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_GC_PARALLEL_PCTASKS_HPP
-#define SHARE_GC_PARALLEL_PCTASKS_HPP
-
-#include "gc/parallel/gcTaskManager.hpp"
-#include "gc/parallel/psParallelCompact.hpp"
-#include "gc/parallel/psTasks.hpp"
-#include "gc/shared/referenceProcessor.hpp"
-
-
-// Tasks for parallel compaction of the old generation
-//
-// Tasks are created and enqueued on a task queue. The
-// tasks for parallel old collector for marking objects
-// are MarkFromRootsTask and ThreadRootsMarkingTask.
-//
-// MarkFromRootsTask's are created
-// with a root group (e.g., jni_handles) and when the do_it()
-// method of a MarkFromRootsTask is executed, it starts marking
-// form it's root group.
-//
-// ThreadRootsMarkingTask's are created for each Java thread.  When
-// the do_it() method of a ThreadRootsMarkingTask is executed, it
-// starts marking from the thread's roots.
-//
-// The enqueueing of the MarkFromRootsTask and ThreadRootsMarkingTask
-// do little more than create the task and put it on a queue.  The
-// queue is a GCTaskQueue and threads steal tasks from this GCTaskQueue.
-//
-// In addition to the MarkFromRootsTask and ThreadRootsMarkingTask
-// tasks there are StealMarkingTask tasks.  The StealMarkingTask's
-// steal a reference from the marking stack of another
-// thread and transitively marks the object of the reference
-// and internal references.  After successfully stealing a reference
-// and marking it, the StealMarkingTask drains its marking stack
-// stack before attempting another steal.
-//
-// ThreadRootsMarkingTask
-//
-// This task marks from the roots of a single thread. This task
-// enables marking of thread roots in parallel.
-//
-
-class ParallelTaskTerminator;
-
-//
-// CompactionWithStealingTask
-//
-// This task is used to distribute work to idle threads.
-//
-
-class CompactionWithStealingTask : public GCTask {
- private:
-   ParallelTaskTerminator* const _terminator;
- public:
-  CompactionWithStealingTask(ParallelTaskTerminator* t);
-
-  char* name() { return (char *)"steal-region-task"; }
-  ParallelTaskTerminator* terminator() { return _terminator; }
-
-  virtual void do_it(GCTaskManager* manager, uint which);
-};
-
-//
-// UpdateDensePrefixTask
-//
-// This task is used to update the dense prefix
-// of a space.
-//
-
-class UpdateDensePrefixTask : public GCTask {
- private:
-  PSParallelCompact::SpaceId _space_id;
-  size_t _region_index_start;
-  size_t _region_index_end;
-
- public:
-  char* name() { return (char *)"update-dense_prefix-task"; }
-
-  UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
-                        size_t region_index_start,
-                        size_t region_index_end);
-
-  virtual void do_it(GCTaskManager* manager, uint which);
-};
-#endif // SHARE_GC_PARALLEL_PCTASKS_HPP
--- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp	Fri Aug 16 09:18:23 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp	Fri Aug 16 09:18:26 2019 +0200
@@ -46,6 +46,7 @@
   friend class IdleGCTask;
   friend class PCRefProcTask;
   friend class MarkFromRootsTask;
+  friend class UpdateDensePrefixAndCompactionTask;
 
  public:
 
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Fri Aug 16 09:18:23 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Fri Aug 16 09:18:26 2019 +0200
@@ -34,7 +34,6 @@
 #include "gc/parallel/parallelArguments.hpp"
 #include "gc/parallel/parallelScavengeHeap.inline.hpp"
 #include "gc/parallel/parMarkBitMap.inline.hpp"
-#include "gc/parallel/pcTasks.hpp"
 #include "gc/parallel/psAdaptiveSizePolicy.hpp"
 #include "gc/parallel/psCompactionManager.inline.hpp"
 #include "gc/parallel/psOldGen.hpp"
@@ -2409,13 +2408,12 @@
   }
 };
 
-void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q,
-                                                      uint parallel_gc_threads)
+void PSParallelCompact::prepare_region_draining_tasks(uint parallel_gc_threads)
 {
   GCTraceTime(Trace, gc, phases) tm("Drain Task Setup", &_gc_timer);
 
   // Find the threads that are active
-  unsigned int which = 0;
+  uint worker_id = 0;
 
   // Find all regions that are available (can be filled immediately) and
   // distribute them to the thread stacks.  The iteration is done in reverse
@@ -2423,7 +2421,6 @@
 
   const ParallelCompactData& sd = PSParallelCompact::summary_data();
 
-  which = 0;
   // id + 1 is used to test termination so unsigned  can
   // be used with an old_space_id == 0.
   FillableRegionLogger region_logger;
@@ -2438,12 +2435,12 @@
 
     for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) {
       if (sd.region(cur)->claim_unsafe()) {
-        ParCompactionManager* cm = ParCompactionManager::manager_array(which);
+        ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id);
         cm->region_stack()->push(cur);
         region_logger.handle(cur);
         // Assign regions to tasks in round-robin fashion.
-        if (++which == parallel_gc_threads) {
-          which = 0;
+        if (++worker_id == parallel_gc_threads) {
+          worker_id = 0;
         }
       }
     }
@@ -2451,10 +2448,40 @@
   }
 }
 
+class TaskQueue : StackObj {
+  volatile uint _counter;
+  uint _size;
+  uint _insert_index;
+  PSParallelCompact::UpdateDensePrefixTask* _backing_array;
+public:
+  explicit TaskQueue(uint size) : _counter(0), _size(size), _insert_index(0), _backing_array(NULL) {
+    _backing_array = NEW_C_HEAP_ARRAY(PSParallelCompact::UpdateDensePrefixTask, _size, mtGC);
+  }
+  ~TaskQueue() {
+    assert(_counter >= _insert_index, "not all queue elements were claimed");
+    FREE_C_HEAP_ARRAY(T, _backing_array);
+  }
+
+  void push(const PSParallelCompact::UpdateDensePrefixTask& value) {
+    assert(_insert_index < _size, "too small backing array");
+    _backing_array[_insert_index++] = value;
+  }
+
+  bool try_claim(PSParallelCompact::UpdateDensePrefixTask& reference) {
+    uint claimed = Atomic::add(1u, &_counter) - 1; // -1 is so that we start with zero
+    if (claimed < _insert_index) {
+      reference = _backing_array[claimed];
+      return true;
+    } else {
+      return false;
+    }
+  }
+};
+
 #define PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING 4
 
-void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
-                                                    uint parallel_gc_threads) {
+void PSParallelCompact::enqueue_dense_prefix_tasks(TaskQueue& task_queue,
+                                                   uint parallel_gc_threads) {
   GCTraceTime(Trace, gc, phases) tm("Dense Prefix Task Setup", &_gc_timer);
 
   ParallelCompactData& sd = PSParallelCompact::summary_data();
@@ -2517,35 +2544,22 @@
         // region_index_end is not processed
         size_t region_index_end = MIN2(region_index_start + regions_per_thread,
                                        region_index_end_dense_prefix);
-        q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id),
-                                             region_index_start,
-                                             region_index_end));
+        task_queue.push(UpdateDensePrefixTask(SpaceId(space_id),
+                                              region_index_start,
+                                              region_index_end));
         region_index_start = region_index_end;
       }
     }
     // This gets any part of the dense prefix that did not
     // fit evenly.
     if (region_index_start < region_index_end_dense_prefix) {
-      q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id),
-                                           region_index_start,
-                                           region_index_end_dense_prefix));
+      task_queue.push(UpdateDensePrefixTask(SpaceId(space_id),
+                                            region_index_start,
+                                            region_index_end_dense_prefix));
     }
   }
 }
 
-void PSParallelCompact::enqueue_region_stealing_tasks(
-                                     GCTaskQueue* q,
-                                     ParallelTaskTerminator* terminator_ptr,
-                                     uint parallel_gc_threads) {
-  GCTraceTime(Trace, gc, phases) tm("Steal Task Setup", &_gc_timer);
-
-  // Once a thread has drained it's stack, it should try to steal regions from
-  // other threads.
-  for (uint j = 0; j < parallel_gc_threads; j++) {
-    q->enqueue(new CompactionWithStealingTask(terminator_ptr));
-  }
-}
-
 #ifdef ASSERT
 // Write a histogram of the number of times the block table was filled for a
 // region.
@@ -2588,26 +2602,87 @@
 }
 #endif // #ifdef ASSERT
 
+static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, uint worker_id) {
+  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
+
+  ParCompactionManager* cm =
+    ParCompactionManager::gc_thread_compaction_manager(worker_id);
+
+  // Drain the stacks that have been preloaded with regions
+  // that are ready to fill.
+
+  cm->drain_region_stacks();
+
+  guarantee(cm->region_stack()->is_empty(), "Not empty");
+
+  size_t region_index = 0;
+
+  while (true) {
+    if (ParCompactionManager::steal(worker_id, region_index)) {
+      PSParallelCompact::fill_and_update_region(cm, region_index);
+      cm->drain_region_stacks();
+    } else {
+      if (terminator->offer_termination()) {
+        break;
+      }
+      // Go around again.
+    }
+  }
+  return;
+}
+
+class UpdateDensePrefixAndCompactionTask: public AbstractGangTask {
+  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
+  TaskQueue& _tq;
+  TaskTerminator _terminator;
+  uint _active_workers;
+
+public:
+  UpdateDensePrefixAndCompactionTask(TaskQueue& tq, uint active_workers) :
+      AbstractGangTask("UpdateDensePrefixAndCompactionTask"),
+      _tq(tq),
+      _terminator(active_workers, ParCompactionManager::region_array()),
+      _active_workers(active_workers) {
+  }
+  virtual void work(uint worker_id) {
+    ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
+
+    for (PSParallelCompact::UpdateDensePrefixTask task; _tq.try_claim(task); /* empty */) {
+      PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
+                                                             task._space_id,
+                                                             task._region_index_start,
+                                                             task._region_index_end);
+    }
+
+    // Once a thread has drained it's stack, it should try to steal regions from
+    // other threads.
+    compaction_with_stealing_work(_terminator.terminator(), worker_id);
+  }
+};
+
 void PSParallelCompact::compact() {
   GCTraceTime(Info, gc, phases) tm("Compaction Phase", &_gc_timer);
 
   ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
   PSOldGen* old_gen = heap->old_gen();
   old_gen->start_array()->reset();
-  uint parallel_gc_threads = heap->gc_task_manager()->workers();
-  uint active_gc_threads = heap->gc_task_manager()->active_workers();
-  TaskQueueSetSuper* qset = ParCompactionManager::region_array();
-  TaskTerminator terminator(active_gc_threads, qset);
-
-  GCTaskQueue* q = GCTaskQueue::create();
-  prepare_region_draining_tasks(q, active_gc_threads);
-  enqueue_dense_prefix_tasks(q, active_gc_threads);
-  enqueue_region_stealing_tasks(q, terminator.terminator(), active_gc_threads);
+  uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers();
+
+  // for [0..last_space_id)
+  //     for [0..active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)
+  //         push
+  //     push
+  //
+  // max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)
+  TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1));
+  prepare_region_draining_tasks(active_gc_threads);
+  enqueue_dense_prefix_tasks(task_queue, active_gc_threads);
 
   {
     GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer);
 
-    gc_task_manager()->execute_and_wait(q);
+    UpdateDensePrefixAndCompactionTask task(task_queue, active_gc_threads);
+    ParallelScavengeHeap::heap()->workers().run_task(&task);
 
 #ifdef  ASSERT
     // Verify that all regions have been processed before the deferred updates.
--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp	Fri Aug 16 09:18:23 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp	Fri Aug 16 09:18:26 2019 +0200
@@ -913,6 +913,8 @@
 // region that can be put on the ready list.  The regions are atomically added
 // and removed from the ready list.
 
+class TaskQueue;
+
 class PSParallelCompact : AllStatic {
  public:
   // Convenient access to type names.
@@ -925,6 +927,24 @@
     from_space_id, to_space_id, last_space_id
   } SpaceId;
 
+  struct UpdateDensePrefixTask : public CHeapObj<mtGC> {
+    SpaceId _space_id;
+    size_t _region_index_start;
+    size_t _region_index_end;
+
+    UpdateDensePrefixTask() :
+        _space_id(SpaceId(0)),
+        _region_index_start(0),
+        _region_index_end(0) {}
+
+    UpdateDensePrefixTask(SpaceId space_id,
+                          size_t region_index_start,
+                          size_t region_index_end) :
+        _space_id(space_id),
+        _region_index_start(region_index_start),
+        _region_index_end(region_index_end) {}
+  };
+
  public:
   // Inline closure decls
   //
@@ -1050,19 +1070,12 @@
   static void compact();
 
   // Add available regions to the stack and draining tasks to the task queue.
-  static void prepare_region_draining_tasks(GCTaskQueue* q,
-                                            uint parallel_gc_threads);
+  static void prepare_region_draining_tasks(uint parallel_gc_threads);
 
   // Add dense prefix update tasks to the task queue.
-  static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
+  static void enqueue_dense_prefix_tasks(TaskQueue& task_queue,
                                          uint parallel_gc_threads);
 
-  // Add region stealing tasks to the task queue.
-  static void enqueue_region_stealing_tasks(
-                                       GCTaskQueue* q,
-                                       ParallelTaskTerminator* terminator_ptr,
-                                       uint parallel_gc_threads);
-
   // If objects are left in eden after a collection, try to move the boundary
   // and absorb them into the old gen.  Returns true if eden was emptied.
   static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,