8224663: Parallel GC: Use WorkGang (5: ScavengeRootsTask)
authorlkorinth
Fri, 16 Aug 2019 09:18:32 +0200
changeset 57771 50c959cc40e8
parent 57770 b5ca334ed54c
child 57772 2410b04f074f
8224663: Parallel GC: Use WorkGang (5: ScavengeRootsTask) Reviewed-by: stefank, kbarrett, tschatzl
src/hotspot/share/gc/parallel/psCardTable.cpp
src/hotspot/share/gc/parallel/psPromotionManager.hpp
src/hotspot/share/gc/parallel/psScavenge.cpp
src/hotspot/share/gc/parallel/psTasks.cpp
src/hotspot/share/gc/parallel/psTasks.hpp
--- a/src/hotspot/share/gc/parallel/psCardTable.cpp	Fri Aug 16 09:18:29 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psCardTable.cpp	Fri Aug 16 09:18:32 2019 +0200
@@ -128,6 +128,38 @@
 // when the space is empty, fix the calculation of
 // end_card to allow sp_top == sp->bottom().
 
+// The generation (old gen) is divided into slices, which are further
+// subdivided into stripes, with one stripe per GC thread. The size of
+// a stripe is a constant, ssize.
+//
+//      +===============+        slice 0
+//      |  stripe 0     |
+//      +---------------+
+//      |  stripe 1     |
+//      +---------------+
+//      |  stripe 2     |
+//      +---------------+
+//      |  stripe 3     |
+//      +===============+        slice 1
+//      |  stripe 0     |
+//      +---------------+
+//      |  stripe 1     |
+//      +---------------+
+//      |  stripe 2     |
+//      +---------------+
+//      |  stripe 3     |
+//      +===============+        slice 2
+//      ...
+//
+// In this case there are 4 threads, so 4 stripes.  A GC thread first works on
+// its stripe within slice 0 and then moves to its stripe in the next slice
+// until it has exceeded the top of the generation.  The distance to stripe in
+// the next slice is calculated based on the number of stripes.  The next
+// stripe is at ssize * number_of_stripes (= slice_stride)..  So after
+// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1 by
+// adding slice_stride to the start of stripe 0 in slice 0 to get to the start
+// of stride 0 in slice 1.
+
 void PSCardTable::scavenge_contents_parallel(ObjectStartArray* start_array,
                                              MutableSpace* sp,
                                              HeapWord* space_top,
--- a/src/hotspot/share/gc/parallel/psPromotionManager.hpp	Fri Aug 16 09:18:29 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.hpp	Fri Aug 16 09:18:32 2019 +0200
@@ -51,6 +51,7 @@
 
 class PSPromotionManager {
   friend class PSScavenge;
+  friend class ScavengeRootsTask;
   friend class PSRefProcTaskExecutor;
  private:
   static PaddedEnd<PSPromotionManager>* _manager_array;
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp	Fri Aug 16 09:18:29 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp	Fri Aug 16 09:18:32 2019 +0200
@@ -23,6 +23,8 @@
  */
 
 #include "precompiled.hpp"
+#include "aot/aotLoader.hpp"
+#include "classfile/classLoaderDataGraph.hpp"
 #include "classfile/stringTable.hpp"
 #include "code/codeCache.hpp"
 #include "gc/parallel/gcTaskManager.hpp"
@@ -32,6 +34,7 @@
 #include "gc/parallel/psMarkSweepProxy.hpp"
 #include "gc/parallel/psParallelCompact.inline.hpp"
 #include "gc/parallel/psPromotionManager.inline.hpp"
+#include "gc/parallel/psRootType.hpp"
 #include "gc/parallel/psScavenge.inline.hpp"
 #include "gc/parallel/psTasks.hpp"
 #include "gc/shared/gcCause.hpp"
@@ -45,9 +48,11 @@
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
 #include "gc/shared/referenceProcessorPhaseTimes.hpp"
+#include "gc/shared/scavengableNMethods.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "gc/shared/workerPolicy.hpp"
+#include "gc/shared/workgroup.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "logging/log.hpp"
@@ -59,6 +64,7 @@
 #include "runtime/threadCritical.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vmOperations.hpp"
+#include "services/management.hpp"
 #include "services/memoryService.hpp"
 #include "utilities/stack.inline.hpp"
 
@@ -76,6 +82,87 @@
 ParallelScavengeTracer        PSScavenge::_gc_tracer;
 CollectorCounters*            PSScavenge::_counters = NULL;
 
+static void scavenge_roots_work(ParallelRootType::Value root_type, uint worker_id) {
+  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
+
+  PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(worker_id);
+  PSScavengeRootsClosure roots_closure(pm);
+  PSPromoteRootsClosure  roots_to_old_closure(pm);
+
+  switch (root_type) {
+    case ParallelRootType::universe:
+      Universe::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::jni_handles:
+      JNIHandles::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::object_synchronizer:
+      ObjectSynchronizer::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::system_dictionary:
+      SystemDictionary::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::class_loader_data:
+      {
+        PSScavengeCLDClosure cld_closure(pm);
+        ClassLoaderDataGraph::cld_do(&cld_closure);
+      }
+      break;
+
+    case ParallelRootType::management:
+      Management::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::jvmti:
+      JvmtiExport::oops_do(&roots_closure);
+      break;
+
+    case ParallelRootType::code_cache:
+      {
+        MarkingCodeBlobClosure code_closure(&roots_to_old_closure, CodeBlobToOopClosure::FixRelocations);
+        ScavengableNMethods::nmethods_do(&code_closure);
+        AOTLoader::oops_do(&roots_closure);
+      }
+      break;
+
+    case ParallelRootType::sentinel:
+    DEBUG_ONLY(default:) // DEBUG_ONLY hack will create compile error on release builds (-Wswitch) and runtime check on debug builds
+      fatal("Bad enumeration value: %u", root_type);
+      break;
+  }
+
+  // Do the real work
+  pm->drain_stacks(false);
+}
+
+static void steal_work(ParallelTaskTerminator& terminator, uint worker_id) {
+  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
+
+  PSPromotionManager* pm =
+    PSPromotionManager::gc_thread_promotion_manager(worker_id);
+  pm->drain_stacks(true);
+  guarantee(pm->stacks_empty(),
+            "stacks should be empty at this point");
+
+  while (true) {
+    StarTask p;
+    if (PSPromotionManager::steal_depth(worker_id, p)) {
+      TASKQUEUE_STATS_ONLY(pm->record_steal(p));
+      pm->process_popped_location_depth(p);
+      pm->drain_stacks_depth(true);
+    } else {
+      if (terminator.offer_termination()) {
+        break;
+      }
+    }
+  }
+  guarantee(pm->stacks_empty(), "stacks should be empty at this point");
+}
+
 // Define before use
 class PSIsAliveClosure: public BoolObjectClosure {
 public:
@@ -222,14 +309,96 @@
   return full_gc_done;
 }
 
-class PSAddThreadRootsTaskClosure : public ThreadClosure {
-private:
-  GCTaskQueue* _q;
+class PSThreadRootsTaskClosure : public ThreadClosure {
+  uint _worker_id;
+public:
+  PSThreadRootsTaskClosure(uint worker_id) : _worker_id(worker_id) { }
+  virtual void do_thread(Thread* thread) {
+    assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
+
+    PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(_worker_id);
+    PSScavengeRootsClosure roots_closure(pm);
+    MarkingCodeBlobClosure roots_in_blobs(&roots_closure, CodeBlobToOopClosure::FixRelocations);
+
+    thread->oops_do(&roots_closure, &roots_in_blobs);
+
+    // Do the real work
+    pm->drain_stacks(false);
+  }
+};
+
+class ScavengeRootsTask : public AbstractGangTask {
+  StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do
+  SequentialSubTasksDone _subtasks;
+  PSOldGen* _old_gen;
+  HeapWord* _gen_top;
+  uint _active_workers;
+  bool _is_empty;
+  TaskTerminator _terminator;
 
 public:
-  PSAddThreadRootsTaskClosure(GCTaskQueue* q) : _q(q) { }
-  void do_thread(Thread* t) {
-    _q->enqueue(new ThreadRootsTask(t));
+  ScavengeRootsTask(PSOldGen* old_gen,
+                    HeapWord* gen_top,
+                    uint active_workers,
+                    bool is_empty) :
+      AbstractGangTask("ScavengeRootsTask"),
+      _strong_roots_scope(active_workers),
+      _subtasks(),
+      _old_gen(old_gen),
+      _gen_top(gen_top),
+      _active_workers(active_workers),
+      _is_empty(is_empty),
+      _terminator(active_workers, PSPromotionManager::vm_thread_promotion_manager()->stack_array_depth()) {
+    _subtasks.set_n_threads(active_workers);
+    _subtasks.set_n_tasks(ParallelRootType::sentinel);
+  }
+
+  virtual void work(uint worker_id) {
+    ResourceMark rm;
+
+    if (!_is_empty) {
+      // There are only old-to-young pointers if there are objects
+      // in the old gen.
+
+      assert(_old_gen != NULL, "Sanity");
+      // There are no old-to-young pointers if the old gen is empty.
+      assert(!_old_gen->object_space()->is_empty(), "Should not be called is there is no work");
+      assert(_old_gen->object_space()->contains(_gen_top) || _gen_top == _old_gen->object_space()->top(), "Sanity");
+      assert(worker_id < ParallelGCThreads, "Sanity");
+
+      {
+        PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(worker_id);
+        PSCardTable* card_table = ParallelScavengeHeap::heap()->card_table();
+
+        card_table->scavenge_contents_parallel(_old_gen->start_array(),
+                                               _old_gen->object_space(),
+                                               _gen_top,
+                                               pm,
+                                               worker_id,
+                                               _active_workers);
+
+        // Do the real work
+        pm->drain_stacks(false);
+      }
+    }
+
+    for (uint root_type = 0; _subtasks.try_claim_task(root_type); /* empty */ ) {
+      scavenge_roots_work(static_cast<ParallelRootType::Value>(root_type), worker_id);
+    }
+    _subtasks.all_tasks_completed();
+
+    PSThreadRootsTaskClosure closure(worker_id);
+    Threads::possibly_parallel_threads_do(true /*parallel */, &closure);
+
+
+    // If active_workers can exceed 1, add a steal_work().
+    // PSPromotionManager::drain_stacks_depth() does not fully drain its
+    // stacks and expects a steal_work() to complete the draining if
+    // ParallelGCThreads is > 1.
+
+    if (_active_workers > 1) {
+      steal_work(*_terminator.terminator() , worker_id);
+    }
   }
 };
 
@@ -361,44 +530,9 @@
     PSPromotionManager* promotion_manager = PSPromotionManager::vm_thread_promotion_manager();
     {
       GCTraceTime(Debug, gc, phases) tm("Scavenge", &_gc_timer);
-      ParallelScavengeHeap::ParStrongRootsScope psrs;
 
-      GCTaskQueue* q = GCTaskQueue::create();
-
-      if (!old_gen->object_space()->is_empty()) {
-        // There are only old-to-young pointers if there are objects
-        // in the old gen.
-        uint stripe_total = active_workers;
-        for(uint i=0; i < stripe_total; i++) {
-          q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i, stripe_total));
-        }
-      }
-
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::universe));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::jni_handles));
-      // We scan the thread roots in parallel
-      PSAddThreadRootsTaskClosure cl(q);
-      Threads::java_threads_and_vm_thread_do(&cl);
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::object_synchronizer));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::management));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::system_dictionary));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::class_loader_data));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::jvmti));
-      q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache));
-
-      TaskTerminator terminator(active_workers,
-                                (TaskQueueSetSuper*) promotion_manager->stack_array_depth());
-        // If active_workers can exceed 1, add a StrealTask.
-        // PSPromotionManager::drain_stacks_depth() does not fully drain its
-        // stacks and expects a StealTask to complete the draining if
-        // ParallelGCThreads is > 1.
-        if (gc_task_manager()->workers() > 1) {
-          for (uint j = 0; j < active_workers; j++) {
-            q->enqueue(new StealTask(terminator.terminator()));
-          }
-        }
-
-      gc_task_manager()->execute_and_wait(q);
+      ScavengeRootsTask task(old_gen, old_top, active_workers, old_gen->object_space()->is_empty());
+      ParallelScavengeHeap::heap()->workers().run_task(&task);
     }
 
     scavenge_midpoint.update();
--- a/src/hotspot/share/gc/parallel/psTasks.cpp	Fri Aug 16 09:18:29 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psTasks.cpp	Fri Aug 16 09:18:32 2019 +0200
@@ -23,8 +23,6 @@
  */
 
 #include "precompiled.hpp"
-#include "aot/aotLoader.hpp"
-#include "classfile/classLoaderDataGraph.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/codeCache.hpp"
 #include "gc/parallel/gcTaskManager.hpp"
@@ -35,7 +33,6 @@
 #include "gc/parallel/psPromotionManager.inline.hpp"
 #include "gc/parallel/psScavenge.inline.hpp"
 #include "gc/parallel/psTasks.hpp"
-#include "gc/shared/scavengableNMethods.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
 #include "memory/iterator.hpp"
 #include "memory/resourceArea.hpp"
@@ -45,88 +42,6 @@
 #include "runtime/vmThread.hpp"
 #include "services/management.hpp"
 
-//
-// ScavengeRootsTask
-//
-
-void ScavengeRootsTask::do_it(GCTaskManager* manager, uint which) {
-  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
-
-  PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
-  PSScavengeRootsClosure roots_closure(pm);
-  PSPromoteRootsClosure  roots_to_old_closure(pm);
-
-  switch (_root_type) {
-    case universe:
-      Universe::oops_do(&roots_closure);
-      break;
-
-    case jni_handles:
-      JNIHandles::oops_do(&roots_closure);
-      break;
-
-    case threads:
-    {
-      ResourceMark rm;
-      Threads::oops_do(&roots_closure, NULL);
-    }
-    break;
-
-    case object_synchronizer:
-      ObjectSynchronizer::oops_do(&roots_closure);
-      break;
-
-    case system_dictionary:
-      SystemDictionary::oops_do(&roots_closure);
-      break;
-
-    case class_loader_data:
-    {
-      PSScavengeCLDClosure cld_closure(pm);
-      ClassLoaderDataGraph::cld_do(&cld_closure);
-    }
-    break;
-
-    case management:
-      Management::oops_do(&roots_closure);
-      break;
-
-    case jvmti:
-      JvmtiExport::oops_do(&roots_closure);
-      break;
-
-    case code_cache:
-      {
-        MarkingCodeBlobClosure code_closure(&roots_to_old_closure, CodeBlobToOopClosure::FixRelocations);
-        ScavengableNMethods::nmethods_do(&code_closure);
-        AOTLoader::oops_do(&roots_closure);
-      }
-      break;
-
-    default:
-      fatal("Unknown root type");
-  }
-
-  // Do the real work
-  pm->drain_stacks(false);
-}
-
-//
-// ThreadRootsTask
-//
-
-void ThreadRootsTask::do_it(GCTaskManager* manager, uint which) {
-  assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
-
-  PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
-  PSScavengeRootsClosure roots_closure(pm);
-  MarkingCodeBlobClosure roots_in_blobs(&roots_closure, CodeBlobToOopClosure::FixRelocations);
-
-  _thread->oops_do(&roots_closure, &roots_in_blobs);
-
-  // Do the real work
-  pm->drain_stacks(false);
-}
 
 //
 // StealTask
@@ -158,31 +73,3 @@
   }
   guarantee(pm->stacks_empty(), "stacks should be empty at this point");
 }
-
-//
-// OldToYoungRootsTask
-//
-
-void OldToYoungRootsTask::do_it(GCTaskManager* manager, uint which) {
-  // There are not old-to-young pointers if the old gen is empty.
-  assert(!_old_gen->object_space()->is_empty(),
-    "Should not be called is there is no work");
-  assert(_old_gen != NULL, "Sanity");
-  assert(_old_gen->object_space()->contains(_gen_top) || _gen_top == _old_gen->object_space()->top(), "Sanity");
-  assert(_stripe_number < ParallelGCThreads, "Sanity");
-
-  {
-    PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
-    PSCardTable* card_table = ParallelScavengeHeap::heap()->card_table();
-
-    card_table->scavenge_contents_parallel(_old_gen->start_array(),
-                                           _old_gen->object_space(),
-                                           _gen_top,
-                                           pm,
-                                           _stripe_number,
-                                           _stripe_total);
-
-    // Do the real work
-    pm->drain_stacks(false);
-  }
-}
--- a/src/hotspot/share/gc/parallel/psTasks.hpp	Fri Aug 16 09:18:29 2019 +0200
+++ b/src/hotspot/share/gc/parallel/psTasks.hpp	Fri Aug 16 09:18:32 2019 +0200
@@ -43,55 +43,6 @@
 class VMThread;
 
 //
-// ScavengeRootsTask
-//
-// This task scans all the roots of a given type.
-//
-//
-
-class ScavengeRootsTask : public GCTask {
- public:
-  enum RootType {
-    universe              = 1,
-    jni_handles           = 2,
-    threads               = 3,
-    object_synchronizer   = 4,
-    system_dictionary     = 5,
-    class_loader_data     = 6,
-    management            = 7,
-    jvmti                 = 8,
-    code_cache            = 9
-  };
- private:
-  RootType _root_type;
- public:
-  ScavengeRootsTask(RootType value) : _root_type(value) {}
-
-  char* name() { return (char *)"scavenge-roots-task"; }
-
-  virtual void do_it(GCTaskManager* manager, uint which);
-};
-
-//
-// ThreadRootsTask
-//
-// This task scans the roots of a single thread. This task
-// enables scanning of thread roots in parallel.
-//
-
-class ThreadRootsTask : public GCTask {
- private:
-  Thread* _thread;
-
- public:
-  ThreadRootsTask(Thread* root) : _thread(root) {}
-
-  char* name() { return (char *)"thread-roots-task"; }
-
-  virtual void do_it(GCTaskManager* manager, uint which);
-};
-
-//
 // StealTask
 //
 // This task is used to distribute work to idle threads.
@@ -110,71 +61,4 @@
   virtual void do_it(GCTaskManager* manager, uint which);
 };
 
-//
-// OldToYoungRootsTask
-//
-// This task is used to scan old to young roots in parallel
-//
-// A GC thread executing this tasks divides the generation (old gen)
-// into slices and takes a stripe in the slice as its part of the
-// work.
-//
-//      +===============+        slice 0
-//      |  stripe 0     |
-//      +---------------+
-//      |  stripe 1     |
-//      +---------------+
-//      |  stripe 2     |
-//      +---------------+
-//      |  stripe 3     |
-//      +===============+        slice 1
-//      |  stripe 0     |
-//      +---------------+
-//      |  stripe 1     |
-//      +---------------+
-//      |  stripe 2     |
-//      +---------------+
-//      |  stripe 3     |
-//      +===============+        slice 2
-//      ...
-//
-// A task is created for each stripe.  In this case there are 4 tasks
-// created.  A GC thread first works on its stripe within slice 0
-// and then moves to its stripe in the next slice until all stripes
-// exceed the top of the generation.  Note that having fewer GC threads
-// than stripes works because all the tasks are executed so all stripes
-// will be covered.  In this example if 4 tasks have been created to cover
-// all the stripes and there are only 3 threads, one of the threads will
-// get the tasks with the 4th stripe.  However, there is a dependence in
-// PSCardTable::scavenge_contents_parallel() on the number
-// of tasks created.  In scavenge_contents_parallel the distance
-// to the next stripe is calculated based on the number of tasks.
-// If the stripe width is ssize, a task's next stripe is at
-// ssize * number_of_tasks (= slice_stride).  In this case after
-// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1
-// by adding slice_stride to the start of stripe 0 in slice 0 to get
-// to the start of stride 0 in slice 1.
-
-class OldToYoungRootsTask : public GCTask {
- private:
-  PSOldGen* _old_gen;
-  HeapWord* _gen_top;
-  uint _stripe_number;
-  uint _stripe_total;
-
- public:
-  OldToYoungRootsTask(PSOldGen *old_gen,
-                      HeapWord* gen_top,
-                      uint stripe_number,
-                      uint stripe_total) :
-    _old_gen(old_gen),
-    _gen_top(gen_top),
-    _stripe_number(stripe_number),
-    _stripe_total(stripe_total) { }
-
-  char* name() { return (char *)"old-to-young-roots-task"; }
-
-  virtual void do_it(GCTaskManager* manager, uint which);
-};
-
 #endif // SHARE_GC_PARALLEL_PSTASKS_HPP