8202845: Refactor reference processing for improved parallelism
authortschatzl
Tue, 29 May 2018 09:26:00 +0200
changeset 50605 7f63c74f0974
parent 50604 929621cf06b4
child 50606 8f1d5d706bdd
8202845: Refactor reference processing for improved parallelism Summary: Fold reference processing's nine phases into four to decrease startup and termination time of this phase. Reviewed-by: kbarrett, sjohanss
src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
src/hotspot/share/gc/cms/parNewGeneration.cpp
src/hotspot/share/gc/g1/g1CollectedHeap.cpp
src/hotspot/share/gc/parallel/psMarkSweep.cpp
src/hotspot/share/gc/parallel/psParallelCompact.cpp
src/hotspot/share/gc/parallel/psScavenge.cpp
src/hotspot/share/gc/serial/defNewGeneration.cpp
src/hotspot/share/gc/serial/genMarkSweep.cpp
src/hotspot/share/gc/shared/referenceProcessor.cpp
src/hotspot/share/gc/shared/referenceProcessor.hpp
src/hotspot/share/gc/shared/referenceProcessor.inline.hpp
src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp
src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.hpp
test/hotspot/jtreg/gc/logging/TestPrintReferences.java
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp	Tue May 29 09:26:00 2018 +0200
@@ -56,6 +56,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/oopStorageParState.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
--- a/src/hotspot/share/gc/cms/parNewGeneration.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp	Tue May 29 09:26:00 2018 +0200
@@ -42,6 +42,7 @@
 #include "gc/shared/plab.inline.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/strongRootsScope.hpp"
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Tue May 29 09:26:00 2018 +0200
@@ -3843,7 +3843,7 @@
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState*          pss = _pss->state_for_worker(worker_id);
+    G1ParScanThreadState* pss = _pss->state_for_worker(worker_id);
     pss->set_ref_discoverer(NULL);
 
     // Keep alive closure.
--- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp	Tue May 29 09:26:00 2018 +0200
@@ -46,6 +46,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "logging/log.hpp"
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp	Tue May 29 09:26:00 2018 +0200
@@ -49,6 +49,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "logging/log.hpp"
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp	Tue May 29 09:26:00 2018 +0200
@@ -43,6 +43,7 @@
 #include "gc/shared/isGCActiveMark.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/weakProcessor.hpp"
 #include "memory/resourceArea.hpp"
@@ -160,7 +161,7 @@
     q->enqueue(new PSRefProcTaskProxy(task, i));
   }
   ParallelTaskTerminator terminator(manager->active_workers(),
-                 (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
+                                    (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
   if (task.marks_oops_alive() && manager->active_workers() > 1) {
     for (uint j = 0; j < manager->active_workers(); j++) {
       q->enqueue(new StealTask(&terminator));
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp	Tue May 29 09:26:00 2018 +0200
@@ -40,6 +40,7 @@
 #include "gc/shared/generationSpec.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/spaceDecorator.hpp"
 #include "gc/shared/strongRootsScope.hpp"
--- a/src/hotspot/share/gc/serial/genMarkSweep.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/serial/genMarkSweep.cpp	Tue May 29 09:26:00 2018 +0200
@@ -41,6 +41,7 @@
 #include "gc/shared/genOopClosures.inline.hpp"
 #include "gc/shared/modRefBarrierSet.hpp"
 #include "gc/shared/referencePolicy.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/weakProcessor.hpp"
--- a/src/hotspot/share/gc/shared/referenceProcessor.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/shared/referenceProcessor.cpp	Tue May 29 09:26:00 2018 +0200
@@ -31,6 +31,7 @@
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessor.inline.hpp"
+#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "logging/log.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@@ -125,8 +126,7 @@
 
   // Initialize all entries to NULL
   for (uint i = 0; i < _max_num_queues * number_of_subclasses_of_ref(); i++) {
-    _discovered_refs[i].set_head(NULL);
-    _discovered_refs[i].set_length(0);
+    _discovered_refs[i].clear();
   }
 
   setup_policy(false /* default soft ref policy */);
@@ -189,6 +189,13 @@
   return total;
 }
 
+#ifdef ASSERT
+void ReferenceProcessor::verify_total_count_zero(DiscoveredList lists[], const char* type) {
+  size_t count = total_count(lists);
+  assert(count == 0, "%ss must be empty but has " SIZE_FORMAT " elements", type, count);
+}
+#endif
+
 ReferenceProcessorStats ReferenceProcessor::process_discovered_references(
   BoolObjectClosure*            is_alive,
   OopClosure*                   keep_alive,
@@ -217,34 +224,27 @@
                                 total_count(_discoveredFinalRefs),
                                 total_count(_discoveredPhantomRefs));
 
-  // Soft references
   {
-    RefProcPhaseTimesTracker tt(REF_SOFT, phase_times, this);
-    process_discovered_reflist(_discoveredSoftRefs, _current_soft_ref_policy, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase1, phase_times, this);
+    process_soft_ref_reconsider(is_alive, keep_alive, complete_gc,
+                                task_executor, phase_times);
   }
 
   update_soft_ref_master_clock();
 
-  // Weak references
   {
-    RefProcPhaseTimesTracker tt(REF_WEAK, phase_times, this);
-    process_discovered_reflist(_discoveredWeakRefs, NULL, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase2, phase_times, this);
+    process_soft_weak_final_refs(is_alive, keep_alive, complete_gc, task_executor, phase_times);
   }
 
-  // Final references
   {
-    RefProcPhaseTimesTracker tt(REF_FINAL, phase_times, this);
-    process_discovered_reflist(_discoveredFinalRefs, NULL, false,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase3, phase_times, this);
+    process_final_keep_alive(keep_alive, complete_gc, task_executor, phase_times);
   }
 
-  // Phantom references
   {
-    RefProcPhaseTimesTracker tt(REF_PHANTOM, phase_times, this);
-    process_discovered_reflist(_discoveredPhantomRefs, NULL, true,
-                               is_alive, keep_alive, complete_gc, task_executor, phase_times);
+    RefProcTotalPhaseTimesTracker tt(RefPhase4, phase_times, this);
+    process_phantom_refs(is_alive, keep_alive, complete_gc, task_executor, phase_times);
   }
 
   if (task_executor != NULL) {
@@ -294,7 +294,7 @@
   // pre-barrier here because we know the Reference has already been found/marked,
   // that's how it ended up in the discovered list in the first place.
   RawAccess<>::oop_store(_prev_discovered_addr, new_next);
-  NOT_PRODUCT(_removed++);
+  _removed++;
   _refs_list.dec_length(1);
 }
 
@@ -318,24 +318,29 @@
   }
 }
 
-// NOTE: process_phase*() are largely similar, and at a high level
-// merely iterate over the extant list applying a predicate to
-// each of its elements and possibly removing that element from the
-// list and applying some further closures to that element.
-// We should consider the possibility of replacing these
-// process_phase*() methods by abstracting them into
-// a single general iterator invocation that receives appropriate
-// closures that accomplish this work.
+inline void log_dropped_ref(const DiscoveredListIterator& iter, const char* reason) {
+  if (log_develop_is_enabled(Trace, gc, ref)) {
+    ResourceMark rm;
+    log_develop_trace(gc, ref)("Dropping %s reference " PTR_FORMAT ": %s",
+                               reason, p2i(iter.obj()),
+                               iter.obj()->klass()->internal_name());
+  }
+}
 
-// (SoftReferences only) Traverse the list and remove any SoftReferences whose
-// referents are not alive, but that should be kept alive for policy reasons.
-// Keep alive the transitive closure of all such referents.
-void
-ReferenceProcessor::process_phase1(DiscoveredList&    refs_list,
-                                   ReferencePolicy*   policy,
-                                   BoolObjectClosure* is_alive,
-                                   OopClosure*        keep_alive,
-                                   VoidClosure*       complete_gc) {
+inline void log_enqueued_ref(const DiscoveredListIterator& iter, const char* reason) {
+  if (log_develop_is_enabled(Trace, gc, ref)) {
+    ResourceMark rm;
+    log_develop_trace(gc, ref)("Enqueue %s reference (" INTPTR_FORMAT ": %s)",
+                               reason, p2i(iter.obj()), iter.obj()->klass()->internal_name());
+  }
+  assert(oopDesc::is_oop(iter.obj(), UseConcMarkSweepGC), "Adding a bad reference");
+}
+
+size_t ReferenceProcessor::process_soft_ref_reconsider_work(DiscoveredList&    refs_list,
+                                                            ReferencePolicy*   policy,
+                                                            BoolObjectClosure* is_alive,
+                                                            OopClosure*        keep_alive,
+                                                            VoidClosure*       complete_gc) {
   assert(policy != NULL, "Must have a non-NULL policy");
   DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
   // Decide which softly reachable refs should be kept alive.
@@ -344,8 +349,7 @@
     bool referent_is_dead = (iter.referent() != NULL) && !iter.is_referent_alive();
     if (referent_is_dead &&
         !policy->should_clear_reference(iter.obj(), _soft_ref_timestamp_clock)) {
-      log_develop_trace(gc, ref)("Dropping reference (" INTPTR_FORMAT ": %s"  ") by policy",
-                                 p2i(iter.obj()), iter.obj()->klass()->internal_name());
+      log_dropped_ref(iter, "by policy");
       // Remove Reference object from list
       iter.remove();
       // keep the referent around
@@ -357,23 +361,16 @@
   }
   // Close the reachable set
   complete_gc->do_void();
+
   log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " dead Refs out of " SIZE_FORMAT " discovered Refs by policy, from list " INTPTR_FORMAT,
                              iter.removed(), iter.processed(), p2i(&refs_list));
+  return iter.removed();
 }
 
-inline void log_dropped_ref(const DiscoveredListIterator& iter, const char* reason) {
-  log_develop_trace(gc, ref)("Dropping %s reference " PTR_FORMAT ": %s",
-                             reason, p2i(iter.obj()),
-                             iter.obj()->klass()->internal_name());
-}
-
-// Traverse the list and remove any Refs whose referents are alive,
-// or NULL if discovery is not atomic.
-void ReferenceProcessor::process_phase2(DiscoveredList&    refs_list,
-                                        BoolObjectClosure* is_alive,
-                                        OopClosure*        keep_alive,
-                                        VoidClosure*       complete_gc) {
-  // complete_gc is unused.
+size_t ReferenceProcessor::process_soft_weak_final_refs_work(DiscoveredList&    refs_list,
+                                                             BoolObjectClosure* is_alive,
+                                                             OopClosure*        keep_alive,
+                                                             bool               do_enqueue_and_clear) {
   DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */));
@@ -395,50 +392,80 @@
       iter.make_referent_alive();
       iter.move_to_next();
     } else {
+      if (do_enqueue_and_clear) {
+        iter.clear_referent();
+        iter.enqueue();
+        log_enqueued_ref(iter, "cleared");
+      }
+      // Keep in discovered list
       iter.next();
     }
   }
-  NOT_PRODUCT(
-    if (iter.processed() > 0) {
-      log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " active Refs out of " SIZE_FORMAT
-        " Refs in discovered list " INTPTR_FORMAT,
-        iter.removed(), iter.processed(), p2i(&refs_list));
-    }
-  )
+  if (do_enqueue_and_clear) {
+    iter.complete_enqueue();
+    refs_list.clear();
+  }
+
+  log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " active Refs out of " SIZE_FORMAT
+                             " Refs in discovered list " INTPTR_FORMAT,
+                             iter.removed(), iter.processed(), p2i(&refs_list));
+  return iter.removed();
 }
 
-void ReferenceProcessor::process_phase3(DiscoveredList&    refs_list,
-                                        bool               clear_referent,
-                                        BoolObjectClosure* is_alive,
-                                        OopClosure*        keep_alive,
-                                        VoidClosure*       complete_gc) {
-  ResourceMark rm;
-  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+size_t ReferenceProcessor::process_final_keep_alive_work(DiscoveredList& refs_list,
+                                                         OopClosure*     keep_alive,
+                                                         VoidClosure*    complete_gc) {
+  DiscoveredListIterator iter(refs_list, keep_alive, NULL);
   while (iter.has_next()) {
     iter.load_ptrs(DEBUG_ONLY(false /* allow_null_referent */));
-    if (clear_referent) {
-      // NULL out referent pointer
-      iter.clear_referent();
-    } else {
-      // Current reference is a FinalReference; that's the only kind we
-      // don't clear the referent, instead keeping it for calling finalize.
-      iter.make_referent_alive();
-      // Self-loop next, to mark it not active.
-      assert(java_lang_ref_Reference::next(iter.obj()) == NULL, "enqueued FinalReference");
-      java_lang_ref_Reference::set_next_raw(iter.obj(), iter.obj());
-    }
+    // keep the referent and followers around
+    iter.make_referent_alive();
+
+    // Self-loop next, to mark the FinalReference not active.
+    assert(java_lang_ref_Reference::next(iter.obj()) == NULL, "enqueued FinalReference");
+    java_lang_ref_Reference::set_next_raw(iter.obj(), iter.obj());
+
     iter.enqueue();
-    log_develop_trace(gc, ref)("Adding %sreference (" INTPTR_FORMAT ": %s) as pending",
-                               clear_referent ? "cleared " : "", p2i(iter.obj()), iter.obj()->klass()->internal_name());
-    assert(oopDesc::is_oop(iter.obj(), UseConcMarkSweepGC), "Adding a bad reference");
+    log_enqueued_ref(iter, "Final");
     iter.next();
   }
   iter.complete_enqueue();
   // Close the reachable set
   complete_gc->do_void();
-  // Clear the list.
-  refs_list.set_head(NULL);
-  refs_list.set_length(0);
+  refs_list.clear();
+
+  assert(iter.removed() == 0, "This phase does not remove anything.");
+  return iter.removed();
+}
+
+size_t ReferenceProcessor::process_phantom_refs_work(DiscoveredList&    refs_list,
+                                          BoolObjectClosure* is_alive,
+                                          OopClosure*        keep_alive,
+                                          VoidClosure*       complete_gc) {
+  DiscoveredListIterator iter(refs_list, keep_alive, is_alive);
+  while (iter.has_next()) {
+    iter.load_ptrs(DEBUG_ONLY(!discovery_is_atomic() /* allow_null_referent */));
+
+    oop const referent = iter.referent();
+
+    if (referent == NULL || iter.is_referent_alive()) {
+      iter.make_referent_alive();
+      iter.remove();
+      iter.move_to_next();
+    } else {
+      iter.clear_referent();
+      iter.enqueue();
+      log_enqueued_ref(iter, "cleared Phantom");
+      iter.next();
+    }
+  }
+  iter.complete_enqueue();
+  // Close the reachable set; needed for collectors which keep_alive_closure do
+  // not immediately complete their work.
+  complete_gc->do_void();
+  refs_list.clear();
+
+  return iter.removed();
 }
 
 void
@@ -450,8 +477,7 @@
     next = java_lang_ref_Reference::discovered(obj);
     java_lang_ref_Reference::set_discovered_raw(obj, NULL);
   }
-  refs_list.set_head(NULL);
-  refs_list.set_length(0);
+  refs_list.clear();
 }
 
 void ReferenceProcessor::abandon_partial_discovery() {
@@ -488,69 +514,107 @@
   return total_count(list);
 }
 
-class RefProcPhase1Task: public AbstractRefProcTaskExecutor::ProcessTask {
+class RefProcPhase1Task : public AbstractRefProcTaskExecutor::ProcessTask {
 public:
   RefProcPhase1Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    ReferencePolicy*              policy,
-                    bool                          marks_oops_alive,
-                    ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times),
-      _policy(policy)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+                    ReferenceProcessorPhaseTimes* phase_times,
+                    ReferencePolicy*              policy)
+    : ProcessTask(ref_processor, true /* marks_oops_alive */, phase_times),
+      _policy(policy) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc)
   {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase1, _phase_times, i);
-
-    _ref_processor.process_phase1(_refs_lists[i], _policy,
-                                  &is_alive, &keep_alive, &complete_gc);
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::SoftRefSubPhase1, _phase_times, worker_id);
+    size_t const removed = _ref_processor.process_soft_ref_reconsider_work(_ref_processor._discoveredSoftRefs[worker_id],
+                                                                           _policy,
+                                                                           &is_alive,
+                                                                           &keep_alive,
+                                                                           &complete_gc);
+    _phase_times->add_ref_cleared(REF_SOFT, removed);
   }
 private:
   ReferencePolicy* _policy;
 };
 
 class RefProcPhase2Task: public AbstractRefProcTaskExecutor::ProcessTask {
+  void run_phase2(uint worker_id,
+                  DiscoveredList list[],
+                  BoolObjectClosure& is_alive,
+                  OopClosure& keep_alive,
+                  bool do_enqueue_and_clear,
+                  ReferenceType ref_type) {
+    size_t const removed = _ref_processor.process_soft_weak_final_refs_work(list[worker_id],
+                                                                            &is_alive,
+                                                                            &keep_alive,
+                                                                            do_enqueue_and_clear);
+    _phase_times->add_ref_cleared(ref_type, removed);
+  }
+
 public:
-  RefProcPhase2Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    bool                          marks_oops_alive,
+  RefProcPhase2Task(ReferenceProcessor& ref_processor,
                     ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+    : ProcessTask(ref_processor, false /* marks_oops_alive */, phase_times) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
-                    VoidClosure& complete_gc)
-  {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase2, _phase_times, i);
-
-    _ref_processor.process_phase2(_refs_lists[i],
-                                  &is_alive, &keep_alive, &complete_gc);
+                    VoidClosure& complete_gc) {
+    RefProcWorkerTimeTracker t(_phase_times->phase2_worker_time_sec(), worker_id);
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::SoftRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredSoftRefs, is_alive, keep_alive, true /* do_enqueue_and_clear */, REF_SOFT);
+    }
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::WeakRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredWeakRefs, is_alive, keep_alive, true /* do_enqueue_and_clear */, REF_WEAK);
+    }
+    {
+      RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::FinalRefSubPhase2, _phase_times, worker_id);
+      run_phase2(worker_id, _ref_processor._discoveredFinalRefs, is_alive, keep_alive, false /* do_enqueue_and_clear */, REF_FINAL);
+    }
+    // Close the reachable set; needed for collectors which keep_alive_closure do
+    // not immediately complete their work.
+    complete_gc.do_void();
   }
 };
 
 class RefProcPhase3Task: public AbstractRefProcTaskExecutor::ProcessTask {
 public:
   RefProcPhase3Task(ReferenceProcessor&           ref_processor,
-                    DiscoveredList                refs_lists[],
-                    bool                         clear_referent,
-                    bool                          marks_oops_alive,
                     ReferenceProcessorPhaseTimes* phase_times)
-    : ProcessTask(ref_processor, refs_lists, marks_oops_alive, phase_times),
-      _clear_referent(clear_referent)
-  { }
-  virtual void work(unsigned int i, BoolObjectClosure& is_alive,
+    : ProcessTask(ref_processor, true /* marks_oops_alive */, phase_times) { }
+
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc)
   {
-    RefProcWorkerTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase3, _phase_times, i);
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::FinalRefSubPhase3, _phase_times, worker_id);
+    _ref_processor.process_final_keep_alive_work(_ref_processor._discoveredFinalRefs[worker_id], &keep_alive, &complete_gc);
+  }
+};
+
+class RefProcPhase4Task: public AbstractRefProcTaskExecutor::ProcessTask {
+public:
+  RefProcPhase4Task(ReferenceProcessor&           ref_processor,
+                    ReferenceProcessorPhaseTimes* phase_times)
+    : ProcessTask(ref_processor, false /* marks_oops_alive */, phase_times) { }
 
-    _ref_processor.process_phase3(_refs_lists[i], _clear_referent,
-                                  &is_alive, &keep_alive, &complete_gc);
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
+                    OopClosure& keep_alive,
+                    VoidClosure& complete_gc)
+  {
+    RefProcSubPhasesWorkerTimeTracker tt(ReferenceProcessor::PhantomRefSubPhase4, _phase_times, worker_id);
+    size_t const removed = _ref_processor.process_phantom_refs_work(_ref_processor._discoveredPhantomRefs[worker_id],
+                                                                    &is_alive,
+                                                                    &keep_alive,
+                                                                    &complete_gc);
+    _phase_times->add_ref_cleared(REF_PHANTOM, removed);
   }
-private:
-  bool _clear_referent;
 };
 
 void ReferenceProcessor::log_reflist(const char* prefix, DiscoveredList list[], uint num_active_queues) {
@@ -614,6 +678,12 @@
   }
 }
 
+void ReferenceProcessor::maybe_balance_queues(DiscoveredList refs_lists[]) {
+  if (_processing_is_mt && need_balance_queues(refs_lists)) {
+    balance_queues(refs_lists);
+  }
+}
+
 // Balances reference queues.
 // Move entries from all queues[0, 1, ..., _max_num_q-1] to
 // queues[0, 1, ..., _num_q-1] because only the first _num_q
@@ -698,77 +768,175 @@
 #endif
 }
 
-void ReferenceProcessor::process_discovered_reflist(
-  DiscoveredList                refs_lists[],
-  ReferencePolicy*              policy,
-  bool                          clear_referent,
-  BoolObjectClosure*            is_alive,
-  OopClosure*                   keep_alive,
-  VoidClosure*                  complete_gc,
-  AbstractRefProcTaskExecutor*  task_executor,
-  ReferenceProcessorPhaseTimes* phase_times)
-{
-  bool mt_processing = task_executor != NULL && _processing_is_mt;
+void ReferenceProcessor::process_soft_ref_reconsider(BoolObjectClosure* is_alive,
+                                                     OopClosure* keep_alive,
+                                                     VoidClosure* complete_gc,
+                                                     AbstractRefProcTaskExecutor*  task_executor,
+                                                     ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  phase_times->set_ref_discovered(REF_SOFT, total_count(_discoveredSoftRefs));
+
+  if (_current_soft_ref_policy == NULL) {
+    return;
+  }
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase1, phase_times);
+    maybe_balance_queues(_discoveredSoftRefs);
+  }
+
+  RefProcPhaseTimeTracker tt(RefPhase1, phase_times);
+
+  log_reflist("Phase1 Soft before", _discoveredSoftRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase1Task phase1(*this, phase_times, _current_soft_ref_policy);
+    task_executor->execute(phase1);
+  } else {
+    size_t removed = 0;
 
-  phase_times->set_processing_is_mt(mt_processing);
+    RefProcSubPhasesWorkerTimeTracker tt2(SoftRefSubPhase1, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      removed += process_soft_ref_reconsider_work(_discoveredSoftRefs[i], _current_soft_ref_policy,
+                                                  is_alive, keep_alive, complete_gc);
+    }
+
+    phase_times->add_ref_cleared(REF_SOFT, removed);
+  }
+  log_reflist("Phase1 Soft after", _discoveredSoftRefs, _max_num_queues);
+}
 
-  if (mt_processing && need_balance_queues(refs_lists)) {
-    RefProcBalanceQueuesTimeTracker tt(phase_times);
-    balance_queues(refs_lists);
+void ReferenceProcessor::process_soft_weak_final_refs(BoolObjectClosure* is_alive,
+                                                      OopClosure* keep_alive,
+                                                      VoidClosure* complete_gc,
+                                                      AbstractRefProcTaskExecutor*  task_executor,
+                                                      ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  phase_times->set_ref_discovered(REF_WEAK, total_count(_discoveredWeakRefs));
+  phase_times->set_ref_discovered(REF_FINAL, total_count(_discoveredFinalRefs));
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase2, phase_times);
+    maybe_balance_queues(_discoveredSoftRefs);
+    maybe_balance_queues(_discoveredWeakRefs);
+    maybe_balance_queues(_discoveredFinalRefs);
   }
 
-  // Phase 1 (soft refs only):
-  // . Traverse the list and remove any SoftReferences whose
-  //   referents are not alive, but that should be kept alive for
-  //   policy reasons. Keep alive the transitive closure of all
-  //   such referents.
-  if (policy != NULL) {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase1, phase_times);
+  RefProcPhaseTimeTracker tt(RefPhase2, phase_times);
 
-    if (mt_processing) {
-      RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase1);
-    } else {
+  log_reflist("Phase2 Soft before", _discoveredSoftRefs, _max_num_queues);
+  log_reflist("Phase2 Weak before", _discoveredWeakRefs, _max_num_queues);
+  log_reflist("Phase2 Final before", _discoveredFinalRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase2Task phase2(*this, phase_times);
+    task_executor->execute(phase2);
+  } else {
+    RefProcWorkerTimeTracker t(phase_times->phase2_worker_time_sec(), 0);
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(SoftRefSubPhase2, phase_times, 0);
       for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase1(refs_lists[i], policy,
-                       is_alive, keep_alive, complete_gc);
+        removed += process_soft_weak_final_refs_work(_discoveredSoftRefs[i], is_alive, keep_alive, true /* do_enqueue */);
       }
+
+      phase_times->add_ref_cleared(REF_SOFT, removed);
     }
-  } else { // policy == NULL
-    assert(refs_lists != _discoveredSoftRefs,
-           "Policy must be specified for soft references.");
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(WeakRefSubPhase2, phase_times, 0);
+      for (uint i = 0; i < _max_num_queues; i++) {
+        removed += process_soft_weak_final_refs_work(_discoveredWeakRefs[i], is_alive, keep_alive, true /* do_enqueue */);
+      }
+
+      phase_times->add_ref_cleared(REF_WEAK, removed);
+    }
+    {
+      size_t removed = 0;
+
+      RefProcSubPhasesWorkerTimeTracker tt2(FinalRefSubPhase2, phase_times, 0);
+      for (uint i = 0; i < _max_num_queues; i++) {
+        removed += process_soft_weak_final_refs_work(_discoveredFinalRefs[i], is_alive, keep_alive, false /* do_enqueue */);
+      }
+
+      phase_times->add_ref_cleared(REF_FINAL, removed);
+    }
+    complete_gc->do_void();
   }
+  verify_total_count_zero(_discoveredSoftRefs, "SoftReference");
+  verify_total_count_zero(_discoveredWeakRefs, "WeakReference");
+  log_reflist("Phase2 Final after", _discoveredFinalRefs, _max_num_queues);
+}
 
-  // Phase 2:
-  // . Traverse the list and remove any refs whose referents are alive.
+void ReferenceProcessor::process_final_keep_alive(OopClosure* keep_alive,
+                                                  VoidClosure* complete_gc,
+                                                  AbstractRefProcTaskExecutor*  task_executor,
+                                                  ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
   {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase2, phase_times);
-
-    if (mt_processing) {
-      RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase2);
-    } else {
-      for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc);
-      }
-    }
+    RefProcBalanceQueuesTimeTracker tt(RefPhase3, phase_times);
+    maybe_balance_queues(_discoveredFinalRefs);
   }
 
   // Phase 3:
-  // . Traverse the list and process referents as appropriate.
-  {
-    RefProcParPhaseTimeTracker tt(ReferenceProcessorPhaseTimes::RefPhase3, phase_times);
+  // . Traverse referents of final references and keep them and followers alive.
+  RefProcPhaseTimeTracker tt(RefPhase3, phase_times);
 
-    if (mt_processing) {
-      RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/, phase_times);
-      task_executor->execute(phase3);
-    } else {
-      for (uint i = 0; i < _max_num_queues; i++) {
-        process_phase3(refs_lists[i], clear_referent,
-                       is_alive, keep_alive, complete_gc);
-      }
+  if (_processing_is_mt) {
+    RefProcPhase3Task phase3(*this, phase_times);
+    task_executor->execute(phase3);
+  } else {
+    RefProcSubPhasesWorkerTimeTracker tt2(FinalRefSubPhase3, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      process_final_keep_alive_work(_discoveredFinalRefs[i], keep_alive, complete_gc);
     }
   }
+  verify_total_count_zero(_discoveredFinalRefs, "FinalReference");
+}
+
+void ReferenceProcessor::process_phantom_refs(BoolObjectClosure* is_alive,
+                                              OopClosure* keep_alive,
+                                              VoidClosure* complete_gc,
+                                              AbstractRefProcTaskExecutor* task_executor,
+                                              ReferenceProcessorPhaseTimes* phase_times) {
+  assert(!_processing_is_mt || task_executor != NULL, "Task executor must not be NULL when mt processing is set.");
+
+  phase_times->set_ref_discovered(REF_PHANTOM, total_count(_discoveredPhantomRefs));
+
+  phase_times->set_processing_is_mt(_processing_is_mt);
+
+  {
+    RefProcBalanceQueuesTimeTracker tt(RefPhase4, phase_times);
+    maybe_balance_queues(_discoveredPhantomRefs);
+  }
+
+  // Phase 4: Walk phantom references appropriately.
+  RefProcPhaseTimeTracker tt(RefPhase4, phase_times);
+
+  log_reflist("Phase4 Phantom before", _discoveredPhantomRefs, _max_num_queues);
+  if (_processing_is_mt) {
+    RefProcPhase4Task phase4(*this, phase_times);
+    task_executor->execute(phase4);
+  } else {
+    size_t removed = 0;
+
+    RefProcSubPhasesWorkerTimeTracker tt(PhantomRefSubPhase4, phase_times, 0);
+    for (uint i = 0; i < _max_num_queues; i++) {
+      removed += process_phantom_refs_work(_discoveredPhantomRefs[i], is_alive, keep_alive, complete_gc);
+    }
+
+    phase_times->add_ref_cleared(REF_PHANTOM, removed);
+  }
+  verify_total_count_zero(_discoveredPhantomRefs, "PhantomReference");
 }
 
 inline DiscoveredList* ReferenceProcessor::get_discovered_list(ReferenceType rt) {
@@ -1119,12 +1287,10 @@
   // Close the reachable set
   complete_gc->do_void();
 
-  NOT_PRODUCT(
-    if (iter.processed() > 0) {
-      log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " Refs out of " SIZE_FORMAT " Refs in discovered list " INTPTR_FORMAT,
-        iter.removed(), iter.processed(), p2i(&refs_list));
-    }
-  )
+  if (iter.processed() > 0) {
+    log_develop_trace(gc, ref)(" Dropped " SIZE_FORMAT " Refs out of " SIZE_FORMAT " Refs in discovered list " INTPTR_FORMAT,
+                               iter.removed(), iter.processed(), p2i(&refs_list));
+  }
   return false;
 }
 
--- a/src/hotspot/share/gc/shared/referenceProcessor.hpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/shared/referenceProcessor.hpp	Tue May 29 09:26:00 2018 +0200
@@ -27,28 +27,14 @@
 
 #include "gc/shared/referenceDiscoverer.hpp"
 #include "gc/shared/referencePolicy.hpp"
-#include "gc/shared/referenceProcessorPhaseTimes.hpp"
 #include "gc/shared/referenceProcessorStats.hpp"
 #include "memory/referenceType.hpp"
 #include "oops/instanceRefKlass.hpp"
 
+class AbstractRefProcTaskExecutor;
 class GCTimer;
-
-// ReferenceProcessor class encapsulates the per-"collector" processing
-// of java.lang.Reference objects for GC. The interface is useful for supporting
-// a generational abstraction, in particular when there are multiple
-// generations that are being independently collected -- possibly
-// concurrently and/or incrementally.
-// ReferenceProcessor class abstracts away from a generational setting
-// by using a closure that determines whether a given reference or referent are
-// subject to this ReferenceProcessor's discovery, thus allowing its use in a
-// straightforward manner in a general, non-generational, non-contiguous generation
-// (or heap) setting.
-//
-
-// forward references
 class ReferencePolicy;
-class AbstractRefProcTaskExecutor;
+class ReferenceProcessorPhaseTimes;
 
 // List of discovered references.
 class DiscoveredList {
@@ -65,6 +51,8 @@
   void   set_length(size_t len) { _len = len;  }
   void   inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); }
   void   dec_length(size_t dec) { _len -= dec; }
+
+  inline void clear();
 private:
   // Set value depending on UseCompressedOops. This could be a template class
   // but then we have to fix all the instantiations and declarations that use this class.
@@ -93,10 +81,8 @@
   oop                _first_seen; // cyclic linked list check
   )
 
-  NOT_PRODUCT(
   size_t             _processed;
   size_t             _removed;
-  )
 
 public:
   inline DiscoveredListIterator(DiscoveredList&    refs_list,
@@ -153,10 +139,8 @@
   void clear_referent();
 
   // Statistics
-  NOT_PRODUCT(
   inline size_t processed() const { return _processed; }
-  inline size_t removed() const   { return _removed; }
-  )
+  inline size_t removed() const { return _removed; }
 
   inline void move_to_next() {
     if (_current_discovered == _next_discovered) {
@@ -166,12 +150,50 @@
       _current_discovered = _next_discovered;
     }
     assert(_current_discovered != _first_seen, "cyclic ref_list found");
-    NOT_PRODUCT(_processed++);
+    _processed++;
   }
 };
 
+// The ReferenceProcessor class encapsulates the per-"collector" processing
+// of java.lang.Reference objects for GC. The interface is useful for supporting
+// a generational abstraction, in particular when there are multiple
+// generations that are being independently collected -- possibly
+// concurrently and/or incrementally.
+// ReferenceProcessor class abstracts away from a generational setting
+// by using a closure that determines whether a given reference or referent are
+// subject to this ReferenceProcessor's discovery, thus allowing its use in a
+// straightforward manner in a general, non-generational, non-contiguous generation
+// (or heap) setting.
 class ReferenceProcessor : public ReferenceDiscoverer {
+  friend class RefProcPhase1Task;
+  friend class RefProcPhase2Task;
+  friend class RefProcPhase3Task;
+  friend class RefProcPhase4Task;
+public:
+  // Names of sub-phases of reference processing. Indicates the type of the reference
+  // processed and the associated phase number at the end.
+  enum RefProcSubPhases {
+    SoftRefSubPhase1,
+    SoftRefSubPhase2,
+    WeakRefSubPhase2,
+    FinalRefSubPhase2,
+    FinalRefSubPhase3,
+    PhantomRefSubPhase4,
+    RefSubPhaseMax
+  };
+
+  // Main phases of reference processing.
+  enum RefProcPhases {
+    RefPhase1,
+    RefPhase2,
+    RefPhase3,
+    RefPhase4,
+    RefPhaseMax
+  };
+
+private:
   size_t total_count(DiscoveredList lists[]) const;
+  void verify_total_count_zero(DiscoveredList lists[], const char* type) NOT_DEBUG_RETURN;
 
   // The SoftReference master timestamp clock
   static jlong _soft_ref_timestamp_clock;
@@ -222,15 +244,72 @@
   DiscoveredList* _discoveredFinalRefs;
   DiscoveredList* _discoveredPhantomRefs;
 
- public:
+  // Phase 1: Re-evaluate soft ref policy.
+  void process_soft_ref_reconsider(BoolObjectClosure* is_alive,
+                                   OopClosure* keep_alive,
+                                   VoidClosure* complete_gc,
+                                   AbstractRefProcTaskExecutor*  task_executor,
+                                   ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 2: Drop Soft/Weak/Final references with a NULL or live referent, and clear
+  // and enqueue non-Final references.
+  void process_soft_weak_final_refs(BoolObjectClosure* is_alive,
+                                    OopClosure* keep_alive,
+                                    VoidClosure* complete_gc,
+                                    AbstractRefProcTaskExecutor*  task_executor,
+                                    ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 3: Keep alive followers of Final references, and enqueue.
+  void process_final_keep_alive(OopClosure* keep_alive,
+                                VoidClosure* complete_gc,
+                                AbstractRefProcTaskExecutor*  task_executor,
+                                ReferenceProcessorPhaseTimes* phase_times);
+
+  // Phase 4: Drop and keep alive live Phantom references, or clear and enqueue if dead.
+  void process_phantom_refs(BoolObjectClosure* is_alive,
+                            OopClosure* keep_alive,
+                            VoidClosure* complete_gc,
+                            AbstractRefProcTaskExecutor*  task_executor,
+                            ReferenceProcessorPhaseTimes* phase_times);
+
+  // Work methods used by the process_* methods. All methods return the number of
+  // removed elements.
+
+  // (SoftReferences only) Traverse the list and remove any SoftReferences whose
+  // referents are not alive, but that should be kept alive for policy reasons.
+  // Keep alive the transitive closure of all such referents.
+  size_t process_soft_ref_reconsider_work(DiscoveredList&     refs_list,
+                                          ReferencePolicy*    policy,
+                                          BoolObjectClosure*  is_alive,
+                                          OopClosure*         keep_alive,
+                                          VoidClosure*        complete_gc);
+
+  // Traverse the list and remove any Refs whose referents are alive,
+  // or NULL if discovery is not atomic. Enqueue and clear the reference for
+  // others if do_enqueue_and_clear is set.
+  size_t process_soft_weak_final_refs_work(DiscoveredList&    refs_list,
+                                           BoolObjectClosure* is_alive,
+                                           OopClosure*        keep_alive,
+                                           bool               do_enqueue_and_clear);
+
+  // Keep alive followers of referents for FinalReferences. Must only be called for
+  // those.
+  size_t process_final_keep_alive_work(DiscoveredList&    refs_list,
+                                       OopClosure*        keep_alive,
+                                       VoidClosure*       complete_gc);
+
+  size_t process_phantom_refs_work(DiscoveredList&    refs_list,
+                                   BoolObjectClosure* is_alive,
+                                   OopClosure*        keep_alive,
+                                   VoidClosure*       complete_gc);
+
+public:
   static int number_of_subclasses_of_ref() { return (REF_PHANTOM - REF_OTHER); }
 
   uint num_queues() const                  { return _num_queues; }
   uint max_num_queues() const              { return _max_num_queues; }
   void set_active_mt_degree(uint v);
 
-  DiscoveredList* discovered_refs()        { return _discovered_refs; }
-
   ReferencePolicy* setup_policy(bool always_clear) {
     _current_soft_ref_policy = always_clear ?
       _always_clear_soft_ref_policy : _default_soft_ref_policy;
@@ -238,38 +317,6 @@
     return _current_soft_ref_policy;
   }
 
-  // Process references with a certain reachability level.
-  void process_discovered_reflist(DiscoveredList                refs_lists[],
-                                  ReferencePolicy*              policy,
-                                  bool                          clear_referent,
-                                  BoolObjectClosure*            is_alive,
-                                  OopClosure*                   keep_alive,
-                                  VoidClosure*                  complete_gc,
-                                  AbstractRefProcTaskExecutor*  task_executor,
-                                  ReferenceProcessorPhaseTimes* phase_times);
-
-  // Work methods used by the method process_discovered_reflist
-  // Phase1: keep alive all those referents that are otherwise
-  // dead but which must be kept alive by policy (and their closure).
-  void process_phase1(DiscoveredList&     refs_list,
-                      ReferencePolicy*    policy,
-                      BoolObjectClosure*  is_alive,
-                      OopClosure*         keep_alive,
-                      VoidClosure*        complete_gc);
-  // Phase2: remove all those references whose referents are
-  // reachable.
-  void process_phase2(DiscoveredList&    refs_list,
-                      BoolObjectClosure* is_alive,
-                      OopClosure*        keep_alive,
-                      VoidClosure*       complete_gc);
-  // Phase3: process the referents by either clearing them
-  // or keeping them alive (and their closure), and enqueuing them.
-  void process_phase3(DiscoveredList&    refs_list,
-                      bool               clear_referent,
-                      BoolObjectClosure* is_alive,
-                      OopClosure*        keep_alive,
-                      VoidClosure*       complete_gc);
-
   // "Preclean" all the discovered reference lists by removing references that
   // are active (e.g. due to the mutator calling enqueue()) or with NULL or
   // strongly reachable referents.
@@ -285,11 +332,11 @@
                                       YieldClosure*      yield,
                                       GCTimer*           gc_timer);
 
+private:
   // Returns the name of the discovered reference list
   // occupying the i / _num_queues slot.
   const char* list_name(uint i);
 
-private:
   // "Preclean" the given discovered reference list by removing references with
   // the attributes mentioned in preclean_discovered_references().
   // Supports both normal and fine grain yielding.
@@ -323,6 +370,9 @@
   void balance_queues(DiscoveredList refs_lists[]);
   bool need_balance_queues(DiscoveredList refs_lists[]);
 
+  // If there is need to balance the given queue, do it.
+  void maybe_balance_queues(DiscoveredList refs_lists[]);
+
   // Update (advance) the soft ref master clock field.
   void update_soft_ref_master_clock();
 
@@ -346,7 +396,6 @@
 
   static void init_statics();
 
- public:
   // get and set "is_alive_non_header" field
   BoolObjectClosure* is_alive_non_header() {
     return _is_alive_non_header;
@@ -576,7 +625,6 @@
   }
 };
 
-
 // This class is an interface used to implement task execution for the
 // reference processing.
 class AbstractRefProcTaskExecutor {
@@ -595,30 +643,27 @@
 // Abstract reference processing task to execute.
 class AbstractRefProcTaskExecutor::ProcessTask {
 protected:
-  ProcessTask(ReferenceProcessor&           ref_processor,
-              DiscoveredList                refs_lists[],
-              bool                          marks_oops_alive,
+  ReferenceProcessor&           _ref_processor;
+  // Indicates whether the phase could generate work that should be balanced across
+  // threads after execution.
+  bool                          _marks_oops_alive;
+  ReferenceProcessorPhaseTimes* _phase_times;
+
+  ProcessTask(ReferenceProcessor& ref_processor,
+              bool marks_oops_alive,
               ReferenceProcessorPhaseTimes* phase_times)
     : _ref_processor(ref_processor),
-      _refs_lists(refs_lists),
-      _phase_times(phase_times),
-      _marks_oops_alive(marks_oops_alive)
+      _marks_oops_alive(marks_oops_alive),
+      _phase_times(phase_times)
   { }
 
 public:
-  virtual void work(unsigned int work_id, BoolObjectClosure& is_alive,
+  virtual void work(uint worker_id,
+                    BoolObjectClosure& is_alive,
                     OopClosure& keep_alive,
                     VoidClosure& complete_gc) = 0;
 
-  // Returns true if a task marks some oops as alive.
-  bool marks_oops_alive() const
-  { return _marks_oops_alive; }
-
-protected:
-  ReferenceProcessor&           _ref_processor;
-  DiscoveredList*               _refs_lists;
-  ReferenceProcessorPhaseTimes* _phase_times;
-  const bool                    _marks_oops_alive;
+  bool marks_oops_alive() const { return _marks_oops_alive; }
 };
 
 #endif // SHARE_VM_GC_SHARED_REFERENCEPROCESSOR_HPP
--- a/src/hotspot/share/gc/shared/referenceProcessor.inline.hpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/shared/referenceProcessor.inline.hpp	Tue May 29 09:26:00 2018 +0200
@@ -47,6 +47,11 @@
  return head() == NULL;
 }
 
+void DiscoveredList::clear() {
+  set_head(NULL);
+  set_length(0);
+}
+
 DiscoveredListIterator::DiscoveredListIterator(DiscoveredList&    refs_list,
                                                OopClosure*        keep_alive,
                                                BoolObjectClosure* is_alive):
@@ -57,10 +62,8 @@
 #ifdef ASSERT
   _first_seen(refs_list.head()),
 #endif
-#ifndef PRODUCT
   _processed(0),
   _removed(0),
-#endif
   _next_discovered(NULL),
   _keep_alive(keep_alive),
   _is_alive(is_alive) {
--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.cpp	Tue May 29 09:26:00 2018 +0200
@@ -31,61 +31,96 @@
 #include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 
-RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers number,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   uint worker_id) :
-  _worker_time(NULL), _start_time(os::elapsedTime()), _worker_id(worker_id) {
-  assert (phase_times != NULL, "Invariant");
+#define ASSERT_REF_TYPE(ref_type) assert((ref_type) >= REF_SOFT && (ref_type) <= REF_PHANTOM, \
+                                         "Invariant (%d)", (int)ref_type)
+
+#define ASSERT_PHASE(phase) assert((phase) >= ReferenceProcessor::RefPhase1 && \
+                                   (phase) < ReferenceProcessor::RefPhaseMax,  \
+                                   "Invariant (%d)", (int)phase);
+
+#define ASSERT_SUB_PHASE(phase) assert((phase) >= ReferenceProcessor::SoftRefSubPhase1 && \
+                                       (phase) < ReferenceProcessor::RefSubPhaseMax, \
+                                       "Invariant (%d)", (int)phase);
+
+static const char* SubPhasesParWorkTitle[ReferenceProcessor::RefSubPhaseMax] = {
+       "SoftRef (ms):",
+       "SoftRef (ms):",
+       "WeakRef (ms):",
+       "FinalRef (ms):",
+       "FinalRef (ms):",
+       "PhantomRef (ms):"
+       };
+
+static const char* Phase2ParWorkTitle = "Total (ms):";
 
-  _worker_time = phase_times->worker_time_sec(phase_times->par_phase(number));
+static const char* SubPhasesSerWorkTitle[ReferenceProcessor::RefSubPhaseMax] = {
+       "SoftRef:",
+       "SoftRef:",
+       "WeakRef:",
+       "FinalRef:",
+       "FinalRef:",
+       "PhantomRef:"
+       };
+
+static const char* Phase2SerWorkTitle = "Total:";
+
+static const char* Indents[6] = {"", "  ", "    ", "      ", "        ", "          "};
+
+static const char* PhaseNames[ReferenceProcessor::RefPhaseMax] = {
+       "Reconsider SoftReferences",
+       "Notify Soft/WeakReferences",
+       "Notify and keep alive finalizable",
+       "Notify PhantomReferences"
+       };
+
+static const char* ReferenceTypeNames[REF_PHANTOM + 1] = {
+       "None", "Other", "SoftReference", "WeakReference", "FinalReference", "PhantomReference"
+       };
+
+STATIC_ASSERT((REF_PHANTOM + 1) == ARRAY_SIZE(ReferenceTypeNames));
+
+static const char* phase_enum_2_phase_string(ReferenceProcessor::RefProcPhases phase) {
+  assert(phase >= ReferenceProcessor::RefPhase1 && phase <= ReferenceProcessor::RefPhaseMax,
+         "Invalid reference processing phase (%d)", phase);
+  return PhaseNames[phase];
 }
 
-RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcParPhases phase,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   uint worker_id) :
-  _worker_time(NULL), _start_time(os::elapsedTime()), _worker_id(worker_id) {
-  assert (phase_times != NULL, "Invariant");
+static const char* ref_type_2_string(ReferenceType ref_type) {
+  ASSERT_REF_TYPE(ref_type);
+  return ReferenceTypeNames[ref_type];
+}
 
-  _worker_time = phase_times->worker_time_sec(phase);
+RefProcWorkerTimeTracker::RefProcWorkerTimeTracker(WorkerDataArray<double>* worker_time, uint worker_id) :
+  _worker_time(worker_time), _start_time(os::elapsedTime()), _worker_id(worker_id) {
+  assert(worker_time != NULL, "Invariant");
 }
 
 RefProcWorkerTimeTracker::~RefProcWorkerTimeTracker() {
-  _worker_time->set(_worker_id, os::elapsedTime() - _start_time);
+  double result = os::elapsedTime() - _start_time;
+  _worker_time->set(_worker_id, result);
+}
+
+RefProcSubPhasesWorkerTimeTracker::RefProcSubPhasesWorkerTimeTracker(ReferenceProcessor::RefProcSubPhases phase,
+                                                                     ReferenceProcessorPhaseTimes* phase_times,
+                                                                     uint worker_id) :
+  RefProcWorkerTimeTracker(phase_times->sub_phase_worker_time_sec(phase), worker_id) {
+}
+
+RefProcSubPhasesWorkerTimeTracker::~RefProcSubPhasesWorkerTimeTracker() {
 }
 
 RefProcPhaseTimeBaseTracker::RefProcPhaseTimeBaseTracker(const char* title,
+                                                         ReferenceProcessor::RefProcPhases phase_number,
                                                          ReferenceProcessorPhaseTimes* phase_times) :
-  _title(title), _phase_times(phase_times), _start_ticks(), _end_ticks() {
+  _phase_times(phase_times), _start_ticks(), _end_ticks(), _phase_number(phase_number) {
   assert(_phase_times != NULL, "Invariant");
 
   _start_ticks.stamp();
   if (_phase_times->gc_timer() != NULL) {
-    _phase_times->gc_timer()->register_gc_phase_start(_title, _start_ticks);
+    _phase_times->gc_timer()->register_gc_phase_start(title, _start_ticks);
   }
 }
 
-static const char* phase_enum_2_phase_string(ReferenceProcessorPhaseTimes::RefProcParPhases phase) {
-  switch(phase) {
-    case ReferenceProcessorPhaseTimes::SoftRefPhase1:
-      return "Phase1";
-    case ReferenceProcessorPhaseTimes::SoftRefPhase2:
-    case ReferenceProcessorPhaseTimes::WeakRefPhase2:
-    case ReferenceProcessorPhaseTimes::FinalRefPhase2:
-    case ReferenceProcessorPhaseTimes::PhantomRefPhase2:
-      return "Phase2";
-    case ReferenceProcessorPhaseTimes::SoftRefPhase3:
-    case ReferenceProcessorPhaseTimes::WeakRefPhase3:
-    case ReferenceProcessorPhaseTimes::FinalRefPhase3:
-    case ReferenceProcessorPhaseTimes::PhantomRefPhase3:
-      return "Phase3";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
-}
-
-static const char* Indents[6] = {"", "  ", "    ", "      ", "        ", "          "};
-
 Ticks RefProcPhaseTimeBaseTracker::end_ticks() {
   // If ASSERT is defined, the default value of Ticks will be -2.
   if (_end_ticks.value() <= 0) {
@@ -108,140 +143,83 @@
   }
 }
 
-RefProcBalanceQueuesTimeTracker::RefProcBalanceQueuesTimeTracker(ReferenceProcessorPhaseTimes* phase_times) :
-  RefProcPhaseTimeBaseTracker("Balance queues", phase_times) {}
+RefProcBalanceQueuesTimeTracker::RefProcBalanceQueuesTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                                                 ReferenceProcessorPhaseTimes* phase_times) :
+  RefProcPhaseTimeBaseTracker("Balance queues", phase_number, phase_times) {}
 
 RefProcBalanceQueuesTimeTracker::~RefProcBalanceQueuesTimeTracker() {
   double elapsed = elapsed_time();
-  phase_times()->set_balance_queues_time_ms(phase_times()->processing_ref_type(), elapsed);
+  phase_times()->set_balance_queues_time_ms(_phase_number, elapsed);
 }
 
-#define ASSERT_REF_TYPE(ref_type) assert(ref_type >= REF_SOFT && ref_type <= REF_PHANTOM, \
-                                         "Invariant (%d)", (int)ref_type)
-
-#define ASSERT_PHASE_NUMBER(phase_number) assert(phase_number >= ReferenceProcessorPhaseTimes::RefPhase1 && \
-                                                 phase_number <= ReferenceProcessorPhaseTimes::RefPhaseMax, \
-                                                 "Invariant (%d)", phase_number);
-
-static const char* phase_number_2_string(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number) {
-  ASSERT_PHASE_NUMBER(phase_number);
-
-  switch(phase_number) {
-    case ReferenceProcessorPhaseTimes::RefPhase1:
-      return "Phase1";
-    case ReferenceProcessorPhaseTimes::RefPhase2:
-      return "Phase2";
-    case ReferenceProcessorPhaseTimes::RefPhase3:
-      return "Phase3";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
-}
-
-RefProcParPhaseTimeTracker::RefProcParPhaseTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number,
+RefProcPhaseTimeTracker::RefProcPhaseTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
                                                        ReferenceProcessorPhaseTimes* phase_times) :
-  _phase_number(phase_number),
-  RefProcPhaseTimeBaseTracker(phase_number_2_string(phase_number), phase_times) {}
-
-RefProcParPhaseTimeTracker::~RefProcParPhaseTimeTracker() {
-  double elapsed = elapsed_time();
-  ReferenceProcessorPhaseTimes::RefProcParPhases phase = phase_times()->par_phase(_phase_number);
-  phase_times()->set_par_phase_time_ms(phase, elapsed);
+  RefProcPhaseTimeBaseTracker(phase_enum_2_phase_string(phase_number), phase_number, phase_times) {
 }
 
-static const char* ref_type_2_string(ReferenceType ref_type) {
-  ASSERT_REF_TYPE(ref_type);
-
-  switch(ref_type) {
-    case REF_SOFT:
-      return "SoftReference";
-    case REF_WEAK:
-      return "WeakReference";
-    case REF_FINAL:
-      return "FinalReference";
-    case REF_PHANTOM:
-      return "PhantomReference";
-    default:
-      ShouldNotReachHere();
-      return NULL;
-  }
+RefProcPhaseTimeTracker::~RefProcPhaseTimeTracker() {
+  double elapsed = elapsed_time();
+  phase_times()->set_phase_time_ms(_phase_number, elapsed);
 }
 
-RefProcPhaseTimesTracker::RefProcPhaseTimesTracker(ReferenceType ref_type,
-                                                   ReferenceProcessorPhaseTimes* phase_times,
-                                                   ReferenceProcessor* rp) :
-  _rp(rp), RefProcPhaseTimeBaseTracker(ref_type_2_string(ref_type), phase_times) {
-  phase_times->set_processing_ref_type(ref_type);
-
-  size_t discovered = rp->total_reference_count(ref_type);
-  phase_times->set_ref_discovered(ref_type, discovered);
+RefProcTotalPhaseTimesTracker::RefProcTotalPhaseTimesTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                                             ReferenceProcessorPhaseTimes* phase_times,
+                                                             ReferenceProcessor* rp) :
+  _rp(rp), RefProcPhaseTimeBaseTracker(phase_enum_2_phase_string(phase_number), phase_number, phase_times) {
 }
 
-RefProcPhaseTimesTracker::~RefProcPhaseTimesTracker() {
+RefProcTotalPhaseTimesTracker::~RefProcTotalPhaseTimesTracker() {
   double elapsed = elapsed_time();
-  ReferenceProcessorPhaseTimes* times = phase_times();
-  ReferenceType ref_type = times->processing_ref_type();
-  times->set_ref_proc_time_ms(ref_type, elapsed);
-
-  size_t after_count = _rp->total_reference_count(ref_type);
-  size_t discovered = times->ref_discovered(ref_type);
-  times->set_ref_cleared(ref_type, discovered - after_count);
+  phase_times()->set_phase_time_ms(_phase_number, elapsed);
 }
 
 ReferenceProcessorPhaseTimes::ReferenceProcessorPhaseTimes(GCTimer* gc_timer, uint max_gc_threads) :
   _gc_timer(gc_timer), _processing_is_mt(false) {
 
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    _worker_time_sec[i] = new WorkerDataArray<double>(max_gc_threads, "Process lists (ms)");
-    _par_phase_time_ms[i] = uninitialized();
+  for (uint i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    _sub_phases_worker_time_sec[i] = new WorkerDataArray<double>(max_gc_threads, SubPhasesParWorkTitle[i]);
   }
+  _phase2_worker_time_sec = new WorkerDataArray<double>(max_gc_threads, Phase2ParWorkTitle);
 
-  for (int i = 0; i < number_of_subclasses_of_ref; i++) {
-    _ref_proc_time_ms[i] = uninitialized();
-    _balance_queues_time_ms[i] = uninitialized();
-    _ref_cleared[i] = 0;
-    _ref_discovered[i] = 0;
-    _ref_enqueued[i] = 0;
-  }
+  reset();
 }
 
 inline int ref_type_2_index(ReferenceType ref_type) {
   return ref_type - REF_SOFT;
 }
 
-#define ASSERT_PAR_PHASE(phase) assert(phase >= ReferenceProcessorPhaseTimes::SoftRefPhase1 && \
-                                       phase < ReferenceProcessorPhaseTimes::RefParPhaseMax, \
-                                       "Invariant (%d)", (int)phase);
-
-WorkerDataArray<double>* ReferenceProcessorPhaseTimes::worker_time_sec(RefProcParPhases par_phase) const {
-  ASSERT_PAR_PHASE(par_phase);
-  return _worker_time_sec[par_phase];
+WorkerDataArray<double>* ReferenceProcessorPhaseTimes::sub_phase_worker_time_sec(ReferenceProcessor::RefProcSubPhases sub_phase) const {
+  ASSERT_SUB_PHASE(sub_phase);
+  return _sub_phases_worker_time_sec[sub_phase];
 }
 
-double ReferenceProcessorPhaseTimes::par_phase_time_ms(RefProcParPhases par_phase) const {
-  ASSERT_PAR_PHASE(par_phase);
-  return _par_phase_time_ms[par_phase];
+double ReferenceProcessorPhaseTimes::phase_time_ms(ReferenceProcessor::RefProcPhases phase) const {
+  ASSERT_PHASE(phase);
+  return _phases_time_ms[phase];
 }
 
-void ReferenceProcessorPhaseTimes::set_par_phase_time_ms(RefProcParPhases par_phase,
-                                                         double par_phase_time_ms) {
-  ASSERT_PAR_PHASE(par_phase);
-  _par_phase_time_ms[par_phase] = par_phase_time_ms;
+void ReferenceProcessorPhaseTimes::set_phase_time_ms(ReferenceProcessor::RefProcPhases phase,
+                                                     double phase_time_ms) {
+  ASSERT_PHASE(phase);
+  _phases_time_ms[phase] = phase_time_ms;
 }
 
 void ReferenceProcessorPhaseTimes::reset() {
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    _worker_time_sec[i]->reset();
-    _par_phase_time_ms[i] = uninitialized();
+  for (int i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    _sub_phases_worker_time_sec[i]->reset();
+    _sub_phases_total_time_ms[i] = uninitialized();
   }
 
+  for (int i = 0; i < ReferenceProcessor::RefPhaseMax; i++) {
+    _phases_time_ms[i] = uninitialized();
+    _balance_queues_time_ms[i] = uninitialized();
+  }
+
+  _phase2_worker_time_sec->reset();
+
   for (int i = 0; i < number_of_subclasses_of_ref; i++) {
-    _ref_proc_time_ms[i] = uninitialized();
-    _balance_queues_time_ms[i] = uninitialized();
     _ref_cleared[i] = 0;
     _ref_discovered[i] = 0;
-    _ref_enqueued[i] = 0;
   }
 
   _total_time_ms = uninitialized();
@@ -250,35 +228,26 @@
 }
 
 ReferenceProcessorPhaseTimes::~ReferenceProcessorPhaseTimes() {
-  for (int i = 0; i < RefParPhaseMax; i++) {
-    delete _worker_time_sec[i];
+  for (int i = 0; i < ReferenceProcessor::RefSubPhaseMax; i++) {
+    delete _sub_phases_worker_time_sec[i];
   }
-}
-
-double ReferenceProcessorPhaseTimes::ref_proc_time_ms(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_proc_time_ms[ref_type_2_index(ref_type)];
+  delete _phase2_worker_time_sec;
 }
 
-void ReferenceProcessorPhaseTimes::set_ref_proc_time_ms(ReferenceType ref_type,
-                                                        double ref_proc_time_ms) {
-  ASSERT_REF_TYPE(ref_type);
-  _ref_proc_time_ms[ref_type_2_index(ref_type)] = ref_proc_time_ms;
+double ReferenceProcessorPhaseTimes::sub_phase_total_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase) const {
+  ASSERT_SUB_PHASE(sub_phase);
+  return _sub_phases_total_time_ms[sub_phase];
 }
 
-size_t ReferenceProcessorPhaseTimes::ref_cleared(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_cleared[ref_type_2_index(ref_type)];
+void ReferenceProcessorPhaseTimes::set_sub_phase_total_phase_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase,
+                                                                     double time_ms) {
+  ASSERT_SUB_PHASE(sub_phase);
+  _sub_phases_total_time_ms[sub_phase] = time_ms;
 }
 
-void ReferenceProcessorPhaseTimes::set_ref_cleared(ReferenceType ref_type, size_t count) {
+void ReferenceProcessorPhaseTimes::add_ref_cleared(ReferenceType ref_type, size_t count) {
   ASSERT_REF_TYPE(ref_type);
-  _ref_cleared[ref_type_2_index(ref_type)] = count;
-}
-
-size_t ReferenceProcessorPhaseTimes::ref_discovered(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_discovered[ref_type_2_index(ref_type)];
+  Atomic::add(count, &_ref_cleared[ref_type_2_index(ref_type)]);
 }
 
 void ReferenceProcessorPhaseTimes::set_ref_discovered(ReferenceType ref_type, size_t count) {
@@ -286,70 +255,14 @@
   _ref_discovered[ref_type_2_index(ref_type)] = count;
 }
 
-size_t ReferenceProcessorPhaseTimes::ref_enqueued(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _ref_enqueued[ref_type_2_index(ref_type)];
-}
-
-void ReferenceProcessorPhaseTimes::set_ref_enqueued(ReferenceType ref_type, size_t count) {
-  ASSERT_REF_TYPE(ref_type);
-  _ref_enqueued[ref_type_2_index(ref_type)] = count;
-}
-
-double ReferenceProcessorPhaseTimes::balance_queues_time_ms(ReferenceType ref_type) const {
-  ASSERT_REF_TYPE(ref_type);
-  return _balance_queues_time_ms[ref_type_2_index(ref_type)];
-}
-
-void ReferenceProcessorPhaseTimes::set_balance_queues_time_ms(ReferenceType ref_type, double time_ms) {
-  ASSERT_REF_TYPE(ref_type);
-  _balance_queues_time_ms[ref_type_2_index(ref_type)] = time_ms;
+double ReferenceProcessorPhaseTimes::balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase) const {
+  ASSERT_PHASE(phase);
+  return _balance_queues_time_ms[phase];
 }
 
-ReferenceProcessorPhaseTimes::RefProcParPhases
-ReferenceProcessorPhaseTimes::par_phase(RefProcPhaseNumbers phase_number) const {
-  ASSERT_PHASE_NUMBER(phase_number);
-  ASSERT_REF_TYPE(_processing_ref_type);
-
-  int result = SoftRefPhase1;
-
-  switch(_processing_ref_type) {
-    case REF_SOFT:
-      result = (int)SoftRefPhase1;
-      result += phase_number;
-
-      assert((RefProcParPhases)result >= SoftRefPhase1 &&
-             (RefProcParPhases)result <= SoftRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_WEAK:
-      result = (int)WeakRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= WeakRefPhase2 &&
-             (RefProcParPhases)result <= WeakRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_FINAL:
-      result = (int)FinalRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= FinalRefPhase2 &&
-             (RefProcParPhases)result <= FinalRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    case REF_PHANTOM:
-      result = (int)PhantomRefPhase2;
-      result += (phase_number - 1);
-      assert((RefProcParPhases)result >= PhantomRefPhase2 &&
-             (RefProcParPhases)result <= PhantomRefPhase3,
-             "Invariant (%d)", result);
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  ASSERT_PAR_PHASE(result);
-
-  return (RefProcParPhases)result;
+void ReferenceProcessorPhaseTimes::set_balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase, double time_ms) {
+  ASSERT_PHASE(phase);
+  _balance_queues_time_ms[phase] = time_ms;
 }
 
 #define TIME_FORMAT "%.1lfms"
@@ -366,10 +279,16 @@
   }
 
   uint next_indent = base_indent + 1;
+  print_phase(ReferenceProcessor::RefPhase1, next_indent);
+  print_phase(ReferenceProcessor::RefPhase2, next_indent);
+  print_phase(ReferenceProcessor::RefPhase3, next_indent);
+  print_phase(ReferenceProcessor::RefPhase4, next_indent);
+
   print_reference(REF_SOFT, next_indent);
   print_reference(REF_WEAK, next_indent);
   print_reference(REF_FINAL, next_indent);
   print_reference(REF_PHANTOM, next_indent);
+
 }
 
 void ReferenceProcessorPhaseTimes::print_reference(ReferenceType ref_type, uint base_indent) const {
@@ -377,73 +296,95 @@
 
   if (lt.is_enabled()) {
     LogStream ls(lt);
-    uint next_indent = base_indent + 1;
     ResourceMark rm;
 
-    ls.print_cr("%s%s: " TIME_FORMAT,
-                Indents[base_indent], ref_type_2_string(ref_type), ref_proc_time_ms(ref_type));
+    ls.print_cr("%s%s:", Indents[base_indent], ref_type_2_string(ref_type));
+
+    uint const next_indent = base_indent + 1;
+    int const ref_type_index = ref_type_2_index(ref_type);
+
+    ls.print_cr("%sDiscovered: " SIZE_FORMAT, Indents[next_indent], _ref_discovered[ref_type_index]);
+    ls.print_cr("%sCleared: " SIZE_FORMAT, Indents[next_indent], _ref_cleared[ref_type_index]);
+  }
+}
+
+void ReferenceProcessorPhaseTimes::print_phase(ReferenceProcessor::RefProcPhases phase, uint indent) const {
+  double phase_time = phase_time_ms(phase);
 
-    double balance_time = balance_queues_time_ms(ref_type);
-    if (balance_time != uninitialized()) {
-      ls.print_cr("%s%s " TIME_FORMAT, Indents[next_indent], "Balance queues:", balance_time);
+  if (phase_time == uninitialized()) {
+    return;
+  }
+
+  LogTarget(Debug, gc, phases, ref) lt;
+  LogStream ls(lt);
+
+  ls.print_cr("%s%s%s " TIME_FORMAT,
+              Indents[indent],
+              phase_enum_2_phase_string(phase),
+              indent == 0 ? "" : ":", /* 0 indent logs don't need colon. */
+              phase_time);
+
+  LogTarget(Debug, gc, phases, ref) lt2;
+  if (lt2.is_enabled()) {
+    LogStream ls(lt2);
+
+    if (_processing_is_mt) {
+      print_balance_time(&ls, phase, indent + 1);
     }
 
-    switch(ref_type) {
-      case REF_SOFT:
-        print_phase(SoftRefPhase1, next_indent);
-        print_phase(SoftRefPhase2, next_indent);
-        print_phase(SoftRefPhase3, next_indent);
-        break;
-
-      case REF_WEAK:
-        print_phase(WeakRefPhase2, next_indent);
-        print_phase(WeakRefPhase3, next_indent);
+    switch (phase) {
+      case ReferenceProcessor::RefPhase1:
+        print_sub_phase(&ls, ReferenceProcessor::SoftRefSubPhase1, indent + 1);
         break;
-
-      case REF_FINAL:
-        print_phase(FinalRefPhase2, next_indent);
-        print_phase(FinalRefPhase3, next_indent);
+      case ReferenceProcessor::RefPhase2:
+        print_sub_phase(&ls, ReferenceProcessor::SoftRefSubPhase2, indent + 1);
+        print_sub_phase(&ls, ReferenceProcessor::WeakRefSubPhase2, indent + 1);
+        print_sub_phase(&ls, ReferenceProcessor::FinalRefSubPhase2, indent + 1);
         break;
-
-      case REF_PHANTOM:
-        print_phase(PhantomRefPhase2, next_indent);
-        print_phase(PhantomRefPhase3, next_indent);
+      case ReferenceProcessor::RefPhase3:
+        print_sub_phase(&ls, ReferenceProcessor::FinalRefSubPhase3, indent + 1);
         break;
-
+      case ReferenceProcessor::RefPhase4:
+        print_sub_phase(&ls, ReferenceProcessor::PhantomRefSubPhase4, indent + 1);
+        break;
       default:
         ShouldNotReachHere();
     }
-
-    ls.print_cr("%s%s " SIZE_FORMAT, Indents[next_indent], "Discovered:", ref_discovered(ref_type));
-    ls.print_cr("%s%s " SIZE_FORMAT, Indents[next_indent], "Cleared:", ref_cleared(ref_type));
-  }
-}
-
-void ReferenceProcessorPhaseTimes::print_phase(RefProcParPhases phase, uint indent) const {
-  double phase_time = par_phase_time_ms(phase);
-  if (phase_time != uninitialized()) {
-    LogTarget(Debug, gc, phases, ref) lt;
-
-    LogStream ls(lt);
-
-    ls.print_cr("%s%s%s " TIME_FORMAT,
-                Indents[indent],
-                phase_enum_2_phase_string(phase),
-                indent == 0 ? "" : ":", /* 0 indent logs don't need colon. */
-                phase_time);
-
-    LogTarget(Trace, gc, phases, ref) lt2;
-    if (_processing_is_mt && lt2.is_enabled()) {
-      LogStream ls(lt2);
-
-      ls.print("%s", Indents[indent + 1]);
-      // worker_time_sec is recorded in seconds but it will be printed in milliseconds.
-      worker_time_sec(phase)->print_summary_on(&ls, true);
+    if (phase == ReferenceProcessor::RefPhase2) {
+      print_worker_time(&ls, _phase2_worker_time_sec, Phase2SerWorkTitle, indent + 1);
     }
   }
 }
 
+void ReferenceProcessorPhaseTimes::print_balance_time(LogStream* ls, ReferenceProcessor::RefProcPhases phase, uint indent) const {
+  double balance_time = balance_queues_time_ms(phase);
+  if (balance_time != uninitialized()) {
+    ls->print_cr("%s%s " TIME_FORMAT, Indents[indent], "Balance queues:", balance_time);
+  }
+}
+
+void ReferenceProcessorPhaseTimes::print_sub_phase(LogStream* ls, ReferenceProcessor::RefProcSubPhases sub_phase, uint indent) const {
+  print_worker_time(ls, _sub_phases_worker_time_sec[sub_phase], SubPhasesSerWorkTitle[sub_phase], indent);
+}
+
+void ReferenceProcessorPhaseTimes::print_worker_time(LogStream* ls, WorkerDataArray<double>* worker_time, const char* ser_title, uint indent) const {
+  ls->print("%s", Indents[indent]);
+  if (_processing_is_mt) {
+    worker_time->print_summary_on(ls, true);
+    LogTarget(Trace, gc, phases, task) lt;
+    if (lt.is_enabled()) {
+      LogStream ls2(lt);
+      ls2.print("%s", Indents[indent]);
+      worker_time->print_details_on(&ls2);
+    }
+  } else {
+    ls->print_cr("%s " TIME_FORMAT,
+                 ser_title,
+                 worker_time->get(0) * MILLIUNITS);
+  }
+}
+
 #undef ASSERT_REF_TYPE
-#undef ASSERT_PHASE_NUMBER
-#undef ASSERT_PAR_PHASE
+#undef ASSERT_SUB_PHASE
+#undef ASSERT_PHASE
 #undef TIME_FORMAT
--- a/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.hpp	Mon Jun 18 08:07:15 2018 +0530
+++ b/src/hotspot/share/gc/shared/referenceProcessorPhaseTimes.hpp	Tue May 29 09:26:00 2018 +0200
@@ -25,108 +25,76 @@
 #ifndef SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
 #define SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
 
+#include "gc/shared/referenceProcessor.hpp"
 #include "gc/shared/referenceProcessorStats.hpp"
 #include "gc/shared/workerDataArray.hpp"
+#include "memory/allocation.hpp"
 #include "memory/referenceType.hpp"
 #include "utilities/ticks.hpp"
 
 class DiscoveredList;
 class GCTimer;
+class LogStream;
 
 class ReferenceProcessorPhaseTimes : public CHeapObj<mtGC> {
-public:
-  // Detailed phases that has parallel work.
-  enum RefProcParPhases {
-    SoftRefPhase1,
-    SoftRefPhase2,
-    SoftRefPhase3,
-    WeakRefPhase2,
-    WeakRefPhase3,
-    FinalRefPhase2,
-    FinalRefPhase3,
-    PhantomRefPhase2,
-    PhantomRefPhase3,
-    RefParPhaseMax
-  };
-
-  // Sub-phases that are used when processing each j.l.Reference types.
-  // Only SoftReference has RefPhase1.
-  enum RefProcPhaseNumbers {
-    RefPhase1,
-    RefPhase2,
-    RefPhase3,
-    RefPhaseMax
-  };
-
-private:
   static const int number_of_subclasses_of_ref = REF_PHANTOM - REF_OTHER; // 5 - 1 = 4
 
-  // Records per thread information of each phase.
-  WorkerDataArray<double>* _worker_time_sec[RefParPhaseMax];
-  // Records elapsed time of each phase.
-  double                   _par_phase_time_ms[RefParPhaseMax];
+  // Records per thread time information of each sub phase.
+  WorkerDataArray<double>* _sub_phases_worker_time_sec[ReferenceProcessor::RefSubPhaseMax];
+  // Total time of each sub phase.
+  double                   _sub_phases_total_time_ms[ReferenceProcessor::RefSubPhaseMax];
 
-  // Total spent time for references.
-  // e.g. _ref_proc_time_ms[0] = _par_phase_time_ms[SoftRefPhase1] +
-  //                             _par_phase_time_ms[SoftRefPhase2] +
-  //                             _par_phase_time_ms[SoftRefPhase3] + extra time.
-  double                   _ref_proc_time_ms[number_of_subclasses_of_ref];
+  // Records total elapsed time for each phase.
+  double                   _phases_time_ms[ReferenceProcessor::RefPhaseMax];
+  // Records total queue balancing for each phase.
+  double                   _balance_queues_time_ms[ReferenceProcessor::RefPhaseMax];
 
+  WorkerDataArray<double>* _phase2_worker_time_sec;
+
+  // Total spent time for reference processing.
   double                   _total_time_ms;
 
   size_t                   _ref_cleared[number_of_subclasses_of_ref];
   size_t                   _ref_discovered[number_of_subclasses_of_ref];
-  size_t                   _ref_enqueued[number_of_subclasses_of_ref];
-  double                   _balance_queues_time_ms[number_of_subclasses_of_ref];
 
   bool                     _processing_is_mt;
 
-  // Currently processing reference type.
-  ReferenceType            _processing_ref_type;
-
   GCTimer*                 _gc_timer;
 
-  double par_phase_time_ms(RefProcParPhases phase) const;
-  double ref_proc_time_ms(ReferenceType ref_type) const;
+  double phase_time_ms(ReferenceProcessor::RefProcPhases phase) const;
+  double sub_phase_total_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase) const;
 
   double total_time_ms() const { return _total_time_ms; }
 
-  size_t ref_cleared(ReferenceType ref_type) const;
-  size_t ref_enqueued(ReferenceType ref_type) const;
-
-  double balance_queues_time_ms(ReferenceType ref_type) const;
+  double balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase) const;
 
   void print_reference(ReferenceType ref_type, uint base_indent) const;
-  void print_phase(RefProcParPhases phase, uint indent) const;
 
+  void print_phase(ReferenceProcessor::RefProcPhases phase, uint indent) const;
+  void print_balance_time(LogStream* ls, ReferenceProcessor::RefProcPhases phase, uint indent) const;
+  void print_sub_phase(LogStream* ls, ReferenceProcessor::RefProcSubPhases sub_phase, uint indent) const;
+  void print_worker_time(LogStream* ls, WorkerDataArray<double>* worker_time, const char* ser_title, uint indent) const;
+
+  static double uninitialized() { return -1.0; }
 public:
   ReferenceProcessorPhaseTimes(GCTimer* gc_timer, uint max_gc_threads);
   ~ReferenceProcessorPhaseTimes();
 
-  static double uninitialized() { return -1.0; }
+  WorkerDataArray<double>* phase2_worker_time_sec() const { return _phase2_worker_time_sec; }
+  WorkerDataArray<double>* sub_phase_worker_time_sec(ReferenceProcessor::RefProcSubPhases phase) const;
+  void set_phase_time_ms(ReferenceProcessor::RefProcPhases phase, double par_phase_time_ms);
 
-  WorkerDataArray<double>* worker_time_sec(RefProcParPhases phase) const;
-  void set_par_phase_time_ms(RefProcParPhases phase, double par_phase_time_ms);
-
-  void set_ref_proc_time_ms(ReferenceType ref_type, double ref_proc_time_ms);
+  void set_sub_phase_total_phase_time_ms(ReferenceProcessor::RefProcSubPhases sub_phase, double ref_proc_time_ms);
 
   void set_total_time_ms(double total_time_ms) { _total_time_ms = total_time_ms; }
 
-  void set_ref_cleared(ReferenceType ref_type, size_t count);
-  size_t ref_discovered(ReferenceType ref_type) const;
+  void add_ref_cleared(ReferenceType ref_type, size_t count);
   void set_ref_discovered(ReferenceType ref_type, size_t count);
-  void set_ref_enqueued(ReferenceType ref_type, size_t count);
 
-  void set_balance_queues_time_ms(ReferenceType ref_type, double time_ms);
+  void set_balance_queues_time_ms(ReferenceProcessor::RefProcPhases phase, double time_ms);
 
   void set_processing_is_mt(bool processing_is_mt) { _processing_is_mt = processing_is_mt; }
 
-  ReferenceType processing_ref_type() const { return _processing_ref_type; }
-  void set_processing_ref_type(ReferenceType processing_ref_type) { _processing_ref_type = processing_ref_type; }
-
-  // Returns RefProcParPhases calculated from phase_number and _processing_ref_type.
-  RefProcParPhases par_phase(RefProcPhaseNumbers phase_number) const;
-
   GCTimer* gc_timer() const { return _gc_timer; }
 
   // Reset all fields. If not reset at next cycle, an assertion will fail.
@@ -135,38 +103,40 @@
   void print_all_references(uint base_indent = 0, bool print_total = true) const;
 };
 
-// Updates working time of each worker thread.
-class RefProcWorkerTimeTracker : public StackObj {
+class RefProcWorkerTimeTracker : public CHeapObj<mtGC> {
 protected:
   WorkerDataArray<double>* _worker_time;
   double                   _start_time;
   uint                     _worker_id;
-
 public:
-  RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers number,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           uint worker_id);
-  RefProcWorkerTimeTracker(ReferenceProcessorPhaseTimes::RefProcParPhases phase,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           uint worker_id);
-  ~RefProcWorkerTimeTracker();
+  RefProcWorkerTimeTracker(WorkerDataArray<double>* worker_time, uint worker_id);
+  virtual ~RefProcWorkerTimeTracker();
+};
+
+// Updates working time of each worker thread for a given sub phase.
+class RefProcSubPhasesWorkerTimeTracker : public RefProcWorkerTimeTracker {
+public:
+  RefProcSubPhasesWorkerTimeTracker(ReferenceProcessor::RefProcSubPhases phase,
+                                    ReferenceProcessorPhaseTimes* phase_times,
+                                    uint worker_id);
+  ~RefProcSubPhasesWorkerTimeTracker();
 };
 
 class RefProcPhaseTimeBaseTracker : public StackObj {
 protected:
-  const char*                   _title;
   ReferenceProcessorPhaseTimes* _phase_times;
   Ticks                         _start_ticks;
   Ticks                         _end_ticks;
 
+  ReferenceProcessor::RefProcPhases _phase_number;
+
   Ticks end_ticks();
   double elapsed_time();
   ReferenceProcessorPhaseTimes* phase_times() const { return _phase_times; }
-  // Print phase elapsed time with each worker information if MT processed.
-  void print_phase(ReferenceProcessorPhaseTimes::RefProcParPhases phase, uint indent);
 
 public:
   RefProcPhaseTimeBaseTracker(const char* title,
+                              ReferenceProcessor::RefProcPhases _phase_number,
                               ReferenceProcessorPhaseTimes* phase_times);
   ~RefProcPhaseTimeBaseTracker();
 };
@@ -175,30 +145,27 @@
 // save it into GCTimer.
 class RefProcBalanceQueuesTimeTracker : public RefProcPhaseTimeBaseTracker {
 public:
-  RefProcBalanceQueuesTimeTracker(ReferenceProcessorPhaseTimes* phase_times);
+  RefProcBalanceQueuesTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                  ReferenceProcessorPhaseTimes* phase_times);
   ~RefProcBalanceQueuesTimeTracker();
 };
 
 // Updates phase time at ReferenceProcessorPhaseTimes and save it into GCTimer.
-class RefProcParPhaseTimeTracker : public RefProcPhaseTimeBaseTracker {
-  ReferenceProcessorPhaseTimes::RefProcPhaseNumbers _phase_number;
-
+class RefProcPhaseTimeTracker : public RefProcPhaseTimeBaseTracker {
 public:
-  RefProcParPhaseTimeTracker(ReferenceProcessorPhaseTimes::RefProcPhaseNumbers phase_number,
-                             ReferenceProcessorPhaseTimes* phase_times);
-  ~RefProcParPhaseTimeTracker();
+  RefProcPhaseTimeTracker(ReferenceProcessor::RefProcPhases phase_number,
+                          ReferenceProcessorPhaseTimes* phase_times);
+  ~RefProcPhaseTimeTracker();
 };
 
-// Updates phase time related information.
-// - Each phase processing time, cleared/discovered reference counts and stats for each working threads if MT processed.
-class RefProcPhaseTimesTracker : public RefProcPhaseTimeBaseTracker {
+// Highest level time tracker.
+class RefProcTotalPhaseTimesTracker : public RefProcPhaseTimeBaseTracker {
   ReferenceProcessor* _rp;
-
 public:
-  RefProcPhaseTimesTracker(ReferenceType ref_type,
-                           ReferenceProcessorPhaseTimes* phase_times,
-                           ReferenceProcessor* rp);
-  ~RefProcPhaseTimesTracker();
+  RefProcTotalPhaseTimesTracker(ReferenceProcessor::RefProcPhases phase_number,
+                                ReferenceProcessorPhaseTimes* phase_times,
+                                ReferenceProcessor* rp);
+  ~RefProcTotalPhaseTimesTracker();
 };
 
 #endif // SHARE_VM_GC_SHARED_REFERENCEPROCESSORPHASETIMES_HPP
--- a/test/hotspot/jtreg/gc/logging/TestPrintReferences.java	Mon Jun 18 08:07:15 2018 +0530
+++ b/test/hotspot/jtreg/gc/logging/TestPrintReferences.java	Tue May 29 09:26:00 2018 +0200
@@ -41,183 +41,209 @@
 import java.util.regex.Matcher;
 
 public class TestPrintReferences {
-  static String output;
-  static final String doubleRegex = "[0-9]+[.,][0-9]+";
-  static final String referenceProcessing = "Reference Processing";
-  static final String softReference = "SoftReference";
-  static final String weakReference = "WeakReference";
-  static final String finalReference = "FinalReference";
-  static final String phantomReference = "PhantomReference";
-  static final String phase1 = "Phase1";
-  static final String phase2 = "Phase2";
-  static final String phase3 = "Phase3";
-  static final String gcLogTimeRegex = ".* GC\\([0-9]+\\) ";
+    static String output;
+    static final String doubleRegex = "[0-9]+[.,][0-9]+";
+    static final String referenceProcessing = "Reference Processing";
+    static final String softReference = "SoftReference";
+    static final String weakReference = "WeakReference";
+    static final String finalReference = "FinalReference";
+    static final String phantomReference = "PhantomReference";
 
-  public static void main(String[] args) throws Exception {
-    test(true);
-    test(false);
-  }
+    static final String phaseReconsiderSoftReferences = "Reconsider SoftReferences";
+    static final String phaseNotifySoftWeakReferences = "Notify Soft/WeakReferences";
+    static final String phaseNotifyKeepAliveFinalizer = "Notify and keep alive finalizable";
+    static final String phaseNotifyPhantomReferences  = "Notify PhantomReferences";
+
+    static final String phase1 = "Phase1";
+    static final String phase2 = "Phase2";
+    static final String phase3 = "Phase3";
+    static final String gcLogTimeRegex = ".* GC\\([0-9]+\\) ";
 
-  static String indent(int count) {
-    return " {" + count + "}";
-  }
+    public static void main(String[] args) throws Exception {
+        testPhases(true);
+        testPhases(false);
+        testRefs();
+    }
+
+    static String indent(int count) {
+        return " {" + count + "}";
+    }
 
-  public static void test(boolean parallelRefProcEnabled) throws Exception {
-    ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder("-Xlog:gc+phases+ref=debug",
-                                                                      "-XX:+UseG1GC",
-                                                                      "-Xmx32M",
-                                                                      // Explicit thread setting is required to avoid using only 1 thread
-                                                                      "-XX:" + (parallelRefProcEnabled ? "+" : "-") + "ParallelRefProcEnabled",
-                                                                      "-XX:ParallelGCThreads=2",
-                                                                      GCTest.class.getName());
-    OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start());
+    public static void testRefs() throws Exception {
+        ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder("-Xlog:gc+ref+phases=debug",
+                                                                          "-XX:+UseG1GC",
+                                                                          "-Xmx32M",
+                                                                          GCTest.class.getName());
+        OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start());
 
-    checkLogFormat(output, parallelRefProcEnabled);
-    checkLogValue(output);
+        checkRefsLogFormat(output);
 
-    output.shouldHaveExitValue(0);
-  }
+        output.shouldHaveExitValue(0);
+    }
 
-  // Find the first Reference Processing log and check its format.
-  public static void checkLogFormat(OutputAnalyzer output, boolean parallelRefProcEnabled) {
-    String countRegex = "[0-9]+";
-    String timeRegex = doubleRegex + "ms";
-    String totalRegex = gcLogTimeRegex + indent(4) + referenceProcessing + ": " + timeRegex + "\n";
-    String balanceRegex = parallelRefProcEnabled ? gcLogTimeRegex + indent(8) + "Balance queues: " + timeRegex + "\n" : "";
-    String softRefRegex = gcLogTimeRegex + indent(6) + softReference + ": " + timeRegex + "\n";
-    String weakRefRegex = gcLogTimeRegex + indent(6) + weakReference + ": " + timeRegex + "\n";
-    String finalRefRegex = gcLogTimeRegex + indent(6) + finalReference + ": " + timeRegex + "\n";
-    String phantomRefRegex = gcLogTimeRegex + indent(6) + phantomReference + ": " + timeRegex + "\n";
-    String refDetailRegex = gcLogTimeRegex + indent(8) + phase2 + ": " + timeRegex + "\n" +
-                            gcLogTimeRegex + indent(8) + phase3 + ": " + timeRegex + "\n" +
-                            gcLogTimeRegex + indent(8) + "Discovered: " + countRegex + "\n" +
-                            gcLogTimeRegex + indent(8) + "Cleared: " + countRegex + "\n";
-    String softRefDetailRegex = gcLogTimeRegex + indent(8) + phase1 + ": " + timeRegex + "\n" + refDetailRegex;
+    private static String refRegex(String reftype) {
+        String countRegex = "[0-9]+";
+        return gcLogTimeRegex + indent(6) + reftype + ":\n" +
+               gcLogTimeRegex + indent(8) + "Discovered: " + countRegex + "\n" +
+               gcLogTimeRegex + indent(8) + "Cleared: " + countRegex + "\n";
+    }
+
+    private static void checkRefsLogFormat(OutputAnalyzer output) {
+        output.shouldMatch(refRegex("SoftReference") +
+                           refRegex("WeakReference") +
+                           refRegex("FinalReference") +
+                           refRegex("PhantomReference"));
+    }
 
-    output.shouldMatch(/* Total Reference processing time */
-                       totalRegex +
-                       /* SoftReference processing */
-                       softRefRegex + balanceRegex + softRefDetailRegex +
-                       /* WeakReference processing */
-                       weakRefRegex + balanceRegex + refDetailRegex +
-                       /* FinalReference processing */
-                       finalRefRegex + balanceRegex + refDetailRegex +
-                       /* PhantomReference processing */
-                       phantomRefRegex + balanceRegex + refDetailRegex
-                       );
-  }
+    public static void testPhases(boolean parallelRefProcEnabled) throws Exception {
+        ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder("-Xlog:gc+phases+ref=debug",
+                                                                          "-XX:+UseG1GC",
+                                                                          "-Xmx32M",
+                                                                          "-XX:" + (parallelRefProcEnabled ? "+" : "-") + "ParallelRefProcEnabled",
+                                                                          "-XX:-UseDynamicNumberOfGCThreads",
+                                                                          "-XX:ParallelGCThreads=2",
+                                                                          GCTest.class.getName());
+        OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start());
+
+        checkLogFormat(output, parallelRefProcEnabled);
+        checkLogValue(output);
+
+        output.shouldHaveExitValue(0);
+    }
 
-  // After getting time value, update 'output' for next use.
-  public static BigDecimal getTimeValue(String name, int indentCount) {
-    // Pattern of 'name', 'value' and some extra strings.
-    String patternString = gcLogTimeRegex + indent(indentCount) + name + ": " + "(" + doubleRegex + ")";
-    Matcher m = Pattern.compile(patternString).matcher(output);
-     if (!m.find()) {
-      throw new RuntimeException("Could not find time log for " + patternString);
-     }
+    private static String phaseRegex(String phaseName) {
+        final String timeRegex = doubleRegex + "ms";
+        return indent(6) + phaseName + ": " + timeRegex + "\n";
+    }
 
-    String match = m.group();
-    String value = m.group(1);
-
-    double result = Double.parseDouble(value);
-
-    int index = output.indexOf(match);
-    if (index != -1) {
-      output = output.substring(index, output.length());
+    private static String subphaseRegex(String subphaseName, boolean parallelRefProcEnabled) {
+        final String timeRegex = "\\s+" + doubleRegex;
+        if (parallelRefProcEnabled) {
+            final String timeInParRegex = timeRegex +",\\s";
+            return gcLogTimeRegex + indent(8) + subphaseName +
+                   " \\(ms\\):\\s+Min: " + timeInParRegex + "Avg: " + timeInParRegex + "Max: " + timeInParRegex + "Diff: " + timeInParRegex + "Sum: " + timeInParRegex +
+                   "Workers: [0-9]+" + "\n";
+        } else {
+            return gcLogTimeRegex + indent(8) + subphaseName + ":" + timeRegex + "ms\n";
+        }
     }
 
-    // Convert to BigDecimal to control the precision of floating point arithmetic.
-    return BigDecimal.valueOf(result);
-  }
+    // Find the first Reference Processing log and check its format.
+    private static void checkLogFormat(OutputAnalyzer output, boolean parallelRefProcEnabled) {
+        String countRegex = "[0-9]+";
+        String timeRegex = doubleRegex + "ms";
 
-  // Reference log is printing 1 decimal place of elapsed time.
-  // So sum of each sub-phases could be slightly larger than the enclosing phase in some cases.
-  // e.g. If there are 3 sub-phases:
-  //      Actual value:  SoftReference(5.55) = phase1(1.85) + phase2(1.85) + phase3(1.85)
-  //      Log value:     SoftReference(5.6) = phase1(1.9) + phase2(1.9) + phase3(1.9)
-  //      When checked:  5.6 < 5.7 (sum of phase1~3)
-  // Because of this we need method to verify that our measurements and calculations are valid.
-  public static boolean greaterThanOrApproximatelyEqual(BigDecimal phaseTime, BigDecimal sumOfSubPhasesTime, BigDecimal tolerance) {
-    if (phaseTime.compareTo(sumOfSubPhasesTime) >= 0) {
-      // phaseTime is greater than or equal.
-      return true;
-    }
+        /* Total Reference processing time */
+        String totalRegex = gcLogTimeRegex + indent(4) + referenceProcessing + ": " + timeRegex + "\n";
+
+        String balanceRegex = parallelRefProcEnabled ? gcLogTimeRegex + indent(8) + "Balance queues: " + timeRegex + "\n" : "";
+
+        final boolean p = parallelRefProcEnabled;
 
-    BigDecimal diff = sumOfSubPhasesTime.subtract(phaseTime);
-    if (diff.compareTo(tolerance) <= 0) {
-      // Difference is within tolerance, so approximately equal.
-      return true;
-    }
-
-    // sumOfSubPhasesTime is greater than phaseTime and not within tolerance.
-    return false;
-  }
+        String phase1Regex = gcLogTimeRegex + phaseRegex(phaseReconsiderSoftReferences) + balanceRegex + subphaseRegex("SoftRef", p);
+        String phase2Regex = gcLogTimeRegex + phaseRegex(phaseNotifySoftWeakReferences) +
+                             balanceRegex +
+                             subphaseRegex("SoftRef", p) +
+                             subphaseRegex("WeakRef", p) +
+                             subphaseRegex("FinalRef", p) +
+                             subphaseRegex("Total", p);
+        String phase3Regex = gcLogTimeRegex + phaseRegex(phaseNotifyKeepAliveFinalizer) + balanceRegex + subphaseRegex("FinalRef", p);
+        String phase4Regex = gcLogTimeRegex + phaseRegex(phaseNotifyPhantomReferences) + balanceRegex + subphaseRegex("PhantomRef", p);
 
-  public static BigDecimal checkPhaseTime(String refType) {
-    BigDecimal phaseTime = getTimeValue(refType, 2);
-    BigDecimal sumOfSubPhasesTime = BigDecimal.valueOf(0.0);
-
-    if (softReference.equals(refType)) {
-      sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phase1, 4));
-    }
-    sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phase2, 4));
-    sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phase3, 4));
-
-    // If there are 3 sub-phases, we should allow 0.1 tolerance.
-    final BigDecimal toleranceFor3SubPhases = BigDecimal.valueOf(0.1);
-    if (!greaterThanOrApproximatelyEqual(phaseTime, sumOfSubPhasesTime, toleranceFor3SubPhases)) {
-      throw new RuntimeException(refType +" time(" + phaseTime +
-                                 "ms) is less than the sum(" + sumOfSubPhasesTime + "ms) of each phases");
+        output.shouldMatch(totalRegex +
+                           phase1Regex +
+                           phase2Regex +
+                           phase3Regex +
+                           phase4Regex);
     }
 
-    return phaseTime;
-  }
+    // After getting time value, update 'output' for next use.
+    private static BigDecimal getTimeValue(String name, int indentCount) {
+        // Pattern of 'name', 'value' and some extra strings.
+        String patternString = gcLogTimeRegex + indent(indentCount) + name + ": " + "(" + doubleRegex + ")";
+        Matcher m = Pattern.compile(patternString).matcher(output);
+        if (!m.find()) {
+            throw new RuntimeException("Could not find time log for " + patternString);
+        }
 
-  // Find the first concurrent Reference Processing log and compare phase time vs. sum of sub-phases.
-  public static void checkLogValue(OutputAnalyzer out) {
-    output = out.getStdout();
+        String match = m.group();
+        String value = m.group(1);
+
+        double result = Double.parseDouble(value);
+
+        int index = output.indexOf(match);
+        if (index != -1) {
+            output = output.substring(index, output.length());
+        }
 
-    String patternString = gcLogTimeRegex + indent(0) +
-                           referenceProcessing + ": " + "[0-9]+[.,][0-9]+";
-    Matcher m = Pattern.compile(patternString).matcher(output);
-    if (m.find()) {
-        int start = m.start();
-        int end = output.length();
-        // If there's another concurrent Reference Processing log, ignore it.
-        if (m.find()) {
-            end = m.start();
+        // Convert to BigDecimal to control the precision of floating point arithmetic.
+        return BigDecimal.valueOf(result);
+   }
+
+    // Reference log is printing 1 decimal place of elapsed time.
+    // So sum of each sub-phases could be slightly larger than the enclosing phase in some cases.
+    // Because of this we need method to verify that our measurements and calculations are valid.
+    private static boolean greaterThanOrApproximatelyEqual(BigDecimal phaseTime, BigDecimal sumOfSubPhasesTime, BigDecimal tolerance) {
+        if (phaseTime.compareTo(sumOfSubPhasesTime) >= 0) {
+            // phaseTime is greater than or equal.
+            return true;
         }
-        if (start != -1) {
-            output = output.substring(start, end);
-            checkTrimmedLogValue();
+
+        BigDecimal diff = sumOfSubPhasesTime.subtract(phaseTime);
+        if (diff.compareTo(tolerance) <= 0) {
+            // Difference is within tolerance, so approximately equal.
+            return true;
         }
-     }
-  }
+
+        // sumOfSubPhasesTime is greater than phaseTime and not within tolerance.
+        return false;
+    }
 
-  public static void checkTrimmedLogValue() {
-    BigDecimal refProcTime = getTimeValue(referenceProcessing, 0);
+    // Find the first concurrent Reference Processing log and compare phase time vs. sum of sub-phases.
+    public static void checkLogValue(OutputAnalyzer out) {
+        output = out.getStdout();
 
-    BigDecimal sumOfSubPhasesTime = checkPhaseTime(softReference);
-    sumOfSubPhasesTime = sumOfSubPhasesTime.add(checkPhaseTime(weakReference));
-    sumOfSubPhasesTime = sumOfSubPhasesTime.add(checkPhaseTime(finalReference));
-    sumOfSubPhasesTime = sumOfSubPhasesTime.add(checkPhaseTime(phantomReference));
-
-    // If there are 4 sub-phases, we should allow 0.2 tolerance.
-    final BigDecimal toleranceFor4SubPhases = BigDecimal.valueOf(0.2);
-    if (!greaterThanOrApproximatelyEqual(refProcTime, sumOfSubPhasesTime, toleranceFor4SubPhases)) {
-      throw new RuntimeException("Reference Processing time(" + refProcTime + "ms) is less than the sum("
-                                 + sumOfSubPhasesTime + "ms) of each phases");
+        String patternString = gcLogTimeRegex + indent(0) +
+                               referenceProcessing + ": " + "[0-9]+[.,][0-9]+";
+        Matcher m = Pattern.compile(patternString).matcher(output);
+        if (m.find()) {
+            int start = m.start();
+            int end = output.length();
+            // If there's another concurrent Reference Processing log, ignore it.
+            if (m.find()) {
+                end = m.start();
+            }
+            if (start != -1) {
+                output = output.substring(start, end);
+                checkTrimmedLogValue();
+            }
+        }
     }
-  }
+
+    private static void checkTrimmedLogValue() {
+        BigDecimal refProcTime = getTimeValue(referenceProcessing, 0);
 
-  static class GCTest {
-    static final int SIZE = 512 * 1024;
-    static Object[] dummy = new Object[SIZE];
+        BigDecimal sumOfSubPhasesTime = getTimeValue(phaseReconsiderSoftReferences, 2);
+        sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phaseNotifySoftWeakReferences, 2));
+        sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phaseNotifyKeepAliveFinalizer, 2));
+        sumOfSubPhasesTime = sumOfSubPhasesTime.add(getTimeValue(phaseNotifyPhantomReferences, 2));
 
-    public static void main(String [] args) {
-      for (int i = 0; i < SIZE; i++) {
-        dummy[i] = new SoftReference<>(new Object());
-      }
+        // If there are 4 phases, we should allow 0.2 tolerance.
+        final BigDecimal toleranceFor4SubPhases = BigDecimal.valueOf(0.2);
+        if (!greaterThanOrApproximatelyEqual(refProcTime, sumOfSubPhasesTime, toleranceFor4SubPhases)) {
+            throw new RuntimeException("Reference Processing time(" + refProcTime + "ms) is less than the sum("
+                                       + sumOfSubPhasesTime + "ms) of each phases");
+        }
     }
-  }
+
+    static class GCTest {
+        static final int SIZE = 512 * 1024;
+        static Object[] dummy = new Object[SIZE];
+
+        public static void main(String [] args) {
+             for (int i = 0; i < SIZE; i++) {
+                  dummy[i] = new SoftReference<>(new Object());
+             }
+        }
+    }
 }