8230706: Waiting on completion of strong nmethod processing causes long pause times with G1
authortschatzl
Thu, 24 Oct 2019 11:08:16 +0200
changeset 58777 18c246ad2ff9
parent 58776 ea153023d832
child 58778 f8ffc59c2812
8230706: Waiting on completion of strong nmethod processing causes long pause times with G1 Summary: Instead of globally waiting for completion of strong nmethod processing during evacuation, synchronize the nmethods processing on a per-nmethod basis so that only one thread processes one nmethod at once using a state. This state indicates what work (strong/weak processing) needs to be done and what has already been done. Reviewed-by: sjohanss, kbarrett
src/hotspot/share/code/nmethod.cpp
src/hotspot/share/code/nmethod.hpp
src/hotspot/share/gc/g1/g1CodeBlobClosure.cpp
src/hotspot/share/gc/g1/g1CodeBlobClosure.hpp
src/hotspot/share/gc/g1/g1FullCollector.cpp
src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp
src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp
src/hotspot/share/gc/g1/g1RootClosures.cpp
src/hotspot/share/gc/g1/g1RootClosures.hpp
src/hotspot/share/gc/g1/g1RootProcessor.cpp
src/hotspot/share/gc/g1/g1RootProcessor.hpp
src/hotspot/share/gc/g1/g1SharedClosures.hpp
src/hotspot/share/gc/z/zRootsIterator.cpp
src/hotspot/share/memory/iterator.cpp
test/hotspot/jtreg/gc/g1/TestGCLogMessages.java
test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java
--- a/src/hotspot/share/code/nmethod.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/code/nmethod.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -1826,57 +1826,183 @@
   }
 }
 
-#define NMETHOD_SENTINEL ((nmethod*)badAddress)
-
 nmethod* volatile nmethod::_oops_do_mark_nmethods;
 
-// An nmethod is "marked" if its _mark_link is set non-null.
-// Even if it is the end of the linked list, it will have a non-null link value,
-// as long as it is on the list.
-// This code must be MP safe, because it is used from parallel GC passes.
-bool nmethod::test_set_oops_do_mark() {
-  assert(nmethod::oops_do_marking_is_active(), "oops_do_marking_prologue must be called");
-  if (_oops_do_mark_link == NULL) {
-    // Claim this nmethod for this thread to mark.
-    if (Atomic::replace_if_null(NMETHOD_SENTINEL, &_oops_do_mark_link)) {
-      // Atomically append this nmethod (now claimed) to the head of the list:
-      nmethod* observed_mark_nmethods = _oops_do_mark_nmethods;
-      for (;;) {
-        nmethod* required_mark_nmethods = observed_mark_nmethods;
-        _oops_do_mark_link = required_mark_nmethods;
-        observed_mark_nmethods =
-          Atomic::cmpxchg(this, &_oops_do_mark_nmethods, required_mark_nmethods);
-        if (observed_mark_nmethods == required_mark_nmethods)
-          break;
-      }
-      // Mark was clear when we first saw this guy.
-      LogTarget(Trace, gc, nmethod) lt;
-      if (lt.is_enabled()) {
-        LogStream ls(lt);
-        CompileTask::print(&ls, this, "oops_do, mark", /*short_form:*/ true);
-      }
-      return false;
+void nmethod::oops_do_log_change(const char* state) {
+  LogTarget(Trace, gc, nmethod) lt;
+  if (lt.is_enabled()) {
+    LogStream ls(lt);
+    CompileTask::print(&ls, this, state, true /* short_form */);
+  }
+}
+
+bool nmethod::oops_do_try_claim() {
+  if (oops_do_try_claim_weak_request()) {
+    nmethod* result = oops_do_try_add_to_list_as_weak_done();
+    assert(result == NULL, "adding to global list as weak done must always succeed.");
+    return true;
+  }
+  return false;
+}
+
+bool nmethod::oops_do_try_claim_weak_request() {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+
+  if ((_oops_do_mark_link == NULL) &&
+      (Atomic::replace_if_null(mark_link(this, claim_weak_request_tag), &_oops_do_mark_link))) {
+    oops_do_log_change("oops_do, mark weak request");
+    return true;
+  }
+  return false;
+}
+
+void nmethod::oops_do_set_strong_done(nmethod* old_head) {
+  _oops_do_mark_link = mark_link(old_head, claim_strong_done_tag);
+}
+
+nmethod::oops_do_mark_link* nmethod::oops_do_try_claim_strong_done() {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+
+  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_done_tag), &_oops_do_mark_link, mark_link(NULL, claim_weak_request_tag));
+  if (old_next == NULL) {
+    oops_do_log_change("oops_do, mark strong done");
+  }
+  return old_next;
+}
+
+nmethod::oops_do_mark_link* nmethod::oops_do_try_add_strong_request(nmethod::oops_do_mark_link* next) {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+  assert(next == mark_link(this, claim_weak_request_tag), "Should be claimed as weak");
+
+  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(this, claim_strong_request_tag), &_oops_do_mark_link, next);
+  if (old_next == next) {
+    oops_do_log_change("oops_do, mark strong request");
+  }
+  return old_next;
+}
+
+bool nmethod::oops_do_try_claim_weak_done_as_strong_done(nmethod::oops_do_mark_link* next) {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+  assert(extract_state(next) == claim_weak_done_tag, "Should be claimed as weak done");
+
+  oops_do_mark_link* old_next = Atomic::cmpxchg(mark_link(extract_nmethod(next), claim_strong_done_tag), &_oops_do_mark_link, next);
+  if (old_next == next) {
+    oops_do_log_change("oops_do, mark weak done -> mark strong done");
+    return true;
+  }
+  return false;
+}
+
+nmethod* nmethod::oops_do_try_add_to_list_as_weak_done() {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+
+  assert(extract_state(_oops_do_mark_link) == claim_weak_request_tag ||
+         extract_state(_oops_do_mark_link) == claim_strong_request_tag,
+         "must be but is nmethod " PTR_FORMAT " %u", p2i(extract_nmethod(_oops_do_mark_link)), extract_state(_oops_do_mark_link));
+
+  nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods);
+  // Self-loop if needed.
+  if (old_head == NULL) {
+    old_head = this;
+  }
+  // Try to install end of list and weak done tag.
+  if (Atomic::cmpxchg(mark_link(old_head, claim_weak_done_tag), &_oops_do_mark_link, mark_link(this, claim_weak_request_tag)) == mark_link(this, claim_weak_request_tag)) {
+    oops_do_log_change("oops_do, mark weak done");
+    return NULL;
+  } else {
+    return old_head;
+  }
+}
+
+void nmethod::oops_do_add_to_list_as_strong_done() {
+  assert(SafepointSynchronize::is_at_safepoint(), "only at safepoint");
+
+  nmethod* old_head = Atomic::xchg(this, &_oops_do_mark_nmethods);
+  // Self-loop if needed.
+  if (old_head == NULL) {
+    old_head = this;
+  }
+  assert(_oops_do_mark_link == mark_link(this, claim_strong_done_tag), "must be but is nmethod " PTR_FORMAT " state %u",
+         p2i(extract_nmethod(_oops_do_mark_link)), extract_state(_oops_do_mark_link));
+
+  oops_do_set_strong_done(old_head);
+}
+
+void nmethod::oops_do_process_weak(OopsDoProcessor* p) {
+  if (!oops_do_try_claim_weak_request()) {
+    // Failed to claim for weak processing.
+    oops_do_log_change("oops_do, mark weak request fail");
+    return;
+  }
+
+  p->do_regular_processing(this);
+
+  nmethod* old_head = oops_do_try_add_to_list_as_weak_done();
+  if (old_head == NULL) {
+    return;
+  }
+  oops_do_log_change("oops_do, mark weak done fail");
+  // Adding to global list failed, another thread added a strong request.
+  assert(extract_state(_oops_do_mark_link) == claim_strong_request_tag,
+         "must be but is %u", extract_state(_oops_do_mark_link));
+
+  oops_do_log_change("oops_do, mark weak request -> mark strong done");
+
+  oops_do_set_strong_done(old_head);
+  // Do missing strong processing.
+  p->do_remaining_strong_processing(this);
+}
+
+void nmethod::oops_do_process_strong(OopsDoProcessor* p) {
+  oops_do_mark_link* next_raw = oops_do_try_claim_strong_done();
+  if (next_raw == NULL) {
+    p->do_regular_processing(this);
+    oops_do_add_to_list_as_strong_done();
+    return;
+  }
+  // Claim failed. Figure out why and handle it.
+  if (oops_do_has_weak_request(next_raw)) {
+    oops_do_mark_link* old = next_raw;
+    // Claim failed because being weak processed (state == "weak request").
+    // Try to request deferred strong processing.
+    next_raw = oops_do_try_add_strong_request(old);
+    if (next_raw == old) {
+      // Successfully requested deferred strong processing.
+      return;
     }
+    // Failed because of a concurrent transition. No longer in "weak request" state.
   }
-  // On fall through, another racing thread marked this nmethod before we did.
-  return true;
+  if (oops_do_has_any_strong_state(next_raw)) {
+    // Already claimed for strong processing or requested for such.
+    return;
+  }
+  if (oops_do_try_claim_weak_done_as_strong_done(next_raw)) {
+    // Successfully claimed "weak done" as "strong done". Do the missing marking.
+    p->do_remaining_strong_processing(this);
+    return;
+  }
+  // Claim failed, some other thread got it.
 }
 
 void nmethod::oops_do_marking_prologue() {
+  assert_at_safepoint();
+
   log_trace(gc, nmethod)("oops_do_marking_prologue");
-  assert(_oops_do_mark_nmethods == NULL, "must not call oops_do_marking_prologue twice in a row");
-  // We use cmpxchg instead of regular assignment here because the user
-  // may fork a bunch of threads, and we need them all to see the same state.
-  nmethod* observed = Atomic::cmpxchg(NMETHOD_SENTINEL, &_oops_do_mark_nmethods, (nmethod*)NULL);
-  guarantee(observed == NULL, "no races in this sequential code");
+  assert(_oops_do_mark_nmethods == NULL, "must be empty");
 }
 
 void nmethod::oops_do_marking_epilogue() {
-  assert(_oops_do_mark_nmethods != NULL, "must not call oops_do_marking_epilogue twice in a row");
-  nmethod* cur = _oops_do_mark_nmethods;
-  while (cur != NMETHOD_SENTINEL) {
-    assert(cur != NULL, "not NULL-terminated");
-    nmethod* next = cur->_oops_do_mark_link;
+  assert_at_safepoint();
+
+  nmethod* next = _oops_do_mark_nmethods;
+  _oops_do_mark_nmethods = NULL;
+  if (next == NULL) {
+    return;
+  }
+  nmethod* cur;
+  do {
+    cur = next;
+    next = extract_nmethod(cur->_oops_do_mark_link);
     cur->_oops_do_mark_link = NULL;
     DEBUG_ONLY(cur->verify_oop_relocations());
 
@@ -1885,11 +2011,8 @@
       LogStream ls(lt);
       CompileTask::print(&ls, cur, "oops_do, unmark", /*short_form:*/ true);
     }
-    cur = next;
-  }
-  nmethod* required = _oops_do_mark_nmethods;
-  nmethod* observed = Atomic::cmpxchg((nmethod*)NULL, &_oops_do_mark_nmethods, required);
-  guarantee(observed == required, "no races in this sequential code");
+    // End if self-loop has been detected.
+  } while (cur != next);
   log_trace(gc, nmethod)("oops_do_marking_epilogue");
 }
 
@@ -2262,6 +2385,8 @@
   assert(voc.ok(), "embedded oops must be OK");
   Universe::heap()->verify_nmethod(this);
 
+  assert(_oops_do_mark_link == NULL, "_oops_do_mark_link for %s should be NULL but is " PTR_FORMAT,
+         nm->method()->external_name(), p2i(_oops_do_mark_link));
   verify_scopes();
 
   CompiledICLocker nm_verify(this);
--- a/src/hotspot/share/code/nmethod.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/code/nmethod.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -67,8 +67,8 @@
   friend class NMethodSweeper;
   friend class CodeCache;  // scavengable oops
   friend class JVMCINMethodData;
+
  private:
-
   // Shared fields for all nmethod's
   int       _entry_bci;        // != InvocationEntryBci if this nmethod is an on-stack replacement method
   jmethodID _jmethod_id;       // Cache of method()->jmethod_id()
@@ -76,8 +76,119 @@
   // To support simple linked-list chaining of nmethods:
   nmethod*  _osr_link;         // from InstanceKlass::osr_nmethods_head
 
+  // STW two-phase nmethod root processing helpers.
+  //
+  // When determining liveness of a given nmethod to do code cache unloading,
+  // some collectors need to to different things depending on whether the nmethods
+  // need to absolutely be kept alive during root processing; "strong"ly reachable
+  // nmethods are known to be kept alive at root processing, but the liveness of
+  // "weak"ly reachable ones is to be determined later.
+  //
+  // We want to allow strong and weak processing of nmethods by different threads
+  // at the same time without heavy synchronization. Additional constraints are
+  // to make sure that every nmethod is processed a minimal amount of time, and
+  // nmethods themselves are always iterated at most once at a particular time.
+  //
+  // Note that strong processing work must be a superset of weak processing work
+  // for this code to work.
+  //
+  // We store state and claim information in the _oops_do_mark_link member, using
+  // the two LSBs for the state and the remaining upper bits for linking together
+  // nmethods that were already visited.
+  // The last element is self-looped, i.e. points to itself to avoid some special
+  // "end-of-list" sentinel value.
+  //
+  // _oops_do_mark_link special values:
+  //
+  //   _oops_do_mark_link == NULL: the nmethod has not been visited at all yet, i.e.
+  //      is Unclaimed.
+  //
+  // For other values, its lowest two bits indicate the following states of the nmethod:
+  //
+  //   weak_request (WR): the nmethod has been claimed by a thread for weak processing
+  //   weak_done (WD): weak processing has been completed for this nmethod.
+  //   strong_request (SR): the nmethod has been found to need strong processing while
+  //       being weak processed.
+  //   strong_done (SD): strong processing has been completed for this nmethod .
+  //
+  // The following shows the _only_ possible progressions of the _oops_do_mark_link
+  // pointer.
+  //
+  // Given
+  //   N as the nmethod
+  //   X the current next value of _oops_do_mark_link
+  //
+  // Unclaimed (C)-> N|WR (C)-> X|WD: the nmethod has been processed weakly by
+  //   a single thread.
+  // Unclaimed (C)-> N|WR (C)-> X|WD (O)-> X|SD: after weak processing has been
+  //   completed (as above) another thread found that the nmethod needs strong
+  //   processing after all.
+  // Unclaimed (C)-> N|WR (O)-> N|SR (C)-> X|SD: during weak processing another
+  //   thread finds that the nmethod needs strong processing, marks it as such and
+  //   terminates. The original thread completes strong processing.
+  // Unclaimed (C)-> N|SD (C)-> X|SD: the nmethod has been processed strongly from
+  //   the beginning by a single thread.
+  //
+  // "|" describes the concatentation of bits in _oops_do_mark_link.
+  //
+  // The diagram also describes the threads responsible for changing the nmethod to
+  // the next state by marking the _transition_ with (C) and (O), which mean "current"
+  // and "other" thread respectively.
+  //
+  struct oops_do_mark_link; // Opaque data type.
+
+  // States used for claiming nmethods during root processing.
+  static const uint claim_weak_request_tag = 0;
+  static const uint claim_weak_done_tag = 1;
+  static const uint claim_strong_request_tag = 2;
+  static const uint claim_strong_done_tag = 3;
+
+  static oops_do_mark_link* mark_link(nmethod* nm, uint tag) {
+    assert(tag <= claim_strong_done_tag, "invalid tag %u", tag);
+    assert(is_aligned(nm, 4), "nmethod pointer must have zero lower two LSB");
+    return (oops_do_mark_link*)(((uintptr_t)nm & ~0x3) | tag);
+  }
+
+  static uint extract_state(oops_do_mark_link* link) {
+    return (uint)((uintptr_t)link & 0x3);
+  }
+
+  static nmethod* extract_nmethod(oops_do_mark_link* link) {
+    return (nmethod*)((uintptr_t)link & ~0x3);
+  }
+
+  void oops_do_log_change(const char* state);
+
+  static bool oops_do_has_weak_request(oops_do_mark_link* next) {
+    return extract_state(next) == claim_weak_request_tag;
+  }
+
+  static bool oops_do_has_any_strong_state(oops_do_mark_link* next) {
+    return extract_state(next) >= claim_strong_request_tag;
+  }
+
+  // Attempt Unclaimed -> N|WR transition. Returns true if successful.
+  bool oops_do_try_claim_weak_request();
+
+  // Attempt Unclaimed -> N|SD transition. Returns the current link.
+  oops_do_mark_link* oops_do_try_claim_strong_done();
+  // Attempt N|WR -> X|WD transition. Returns NULL if successful, X otherwise.
+  nmethod* oops_do_try_add_to_list_as_weak_done();
+
+  // Attempt X|WD -> N|SR transition. Returns the current link.
+  oops_do_mark_link* oops_do_try_add_strong_request(oops_do_mark_link* next);
+  // Attempt X|WD -> X|SD transition. Returns true if successful.
+  bool oops_do_try_claim_weak_done_as_strong_done(oops_do_mark_link* next);
+
+  // Do the N|SD -> X|SD transition.
+  void oops_do_add_to_list_as_strong_done();
+
+  // Sets this nmethod as strongly claimed (as part of N|SD -> X|SD and N|SR -> X|SD
+  // transitions).
+  void oops_do_set_strong_done(nmethod* old_head);
+
   static nmethod* volatile _oops_do_mark_nmethods;
-  nmethod*        volatile _oops_do_mark_link;
+  oops_do_mark_link* volatile _oops_do_mark_link;
 
   // offsets for entry points
   address _entry_point;                      // entry point with class check
@@ -480,11 +591,30 @@
   void oops_do(OopClosure* f) { oops_do(f, false); }
   void oops_do(OopClosure* f, bool allow_dead);
 
-  bool test_set_oops_do_mark();
+  // All-in-one claiming of nmethods: returns true if the caller successfully claimed that
+  // nmethod.
+  bool oops_do_try_claim();
+
+  // Class containing callbacks for the oops_do_process_weak/strong() methods
+  // below.
+  class OopsDoProcessor {
+  public:
+    // Process the oops of the given nmethod based on whether it has been called
+    // in a weak or strong processing context, i.e. apply either weak or strong
+    // work on it.
+    virtual void do_regular_processing(nmethod* nm) = 0;
+    // Assuming that the oops of the given nmethod has already been its weak
+    // processing applied, apply the remaining strong processing part.
+    virtual void do_remaining_strong_processing(nmethod* nm) = 0;
+  };
+
+  // The following two methods do the work corresponding to weak/strong nmethod
+  // processing.
+  void oops_do_process_weak(OopsDoProcessor* p);
+  void oops_do_process_strong(OopsDoProcessor* p);
+
   static void oops_do_marking_prologue();
   static void oops_do_marking_epilogue();
-  static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
-  bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
 
  private:
   ScopeDesc* scope_desc_in(address begin, address end);
--- a/src/hotspot/share/gc/g1/g1CodeBlobClosure.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1CodeBlobClosure.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -26,6 +26,7 @@
 #include "code/nmethod.hpp"
 #include "gc/g1/g1CodeBlobClosure.hpp"
 #include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/g1ConcurrentMark.inline.hpp"
 #include "gc/g1/heapRegion.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
 #include "oops/access.inline.hpp"
@@ -52,14 +53,62 @@
   do_oop_work(o);
 }
 
-void G1CodeBlobClosure::do_code_blob(CodeBlob* cb) {
-  nmethod* nm = cb->as_nmethod_or_null();
-  if (nm != NULL) {
-    if (!nm->test_set_oops_do_mark()) {
-      _oc.set_nm(nm);
-      nm->oops_do(&_oc);
-      nm->fix_oop_relocations();
-    }
+template<typename T>
+void G1CodeBlobClosure::MarkingOopClosure::do_oop_work(T* p) {
+  T oop_or_narrowoop = RawAccess<>::oop_load(p);
+  if (!CompressedOops::is_null(oop_or_narrowoop)) {
+    oop o = CompressedOops::decode_not_null(oop_or_narrowoop);
+    _cm->mark_in_next_bitmap(_worker_id, o);
   }
 }
 
+G1CodeBlobClosure::MarkingOopClosure::MarkingOopClosure(uint worker_id) :
+  _cm(G1CollectedHeap::heap()->concurrent_mark()), _worker_id(worker_id) { }
+
+void G1CodeBlobClosure::MarkingOopClosure::do_oop(oop* o) {
+  do_oop_work(o);
+}
+
+void G1CodeBlobClosure::MarkingOopClosure::do_oop(narrowOop* o) {
+  do_oop_work(o);
+}
+
+void G1CodeBlobClosure::do_evacuation_and_fixup(nmethod* nm) {
+  _oc.set_nm(nm);
+  nm->oops_do(&_oc);
+  nm->fix_oop_relocations();
+}
+
+void G1CodeBlobClosure::do_marking(nmethod* nm) {
+  nm->oops_do(&_marking_oc);
+}
+
+class G1NmethodProcessor : public nmethod::OopsDoProcessor {
+  G1CodeBlobClosure* _cl;
+
+public:
+  G1NmethodProcessor(G1CodeBlobClosure* cl) : _cl(cl) { }
+
+  void do_regular_processing(nmethod* nm) {
+    _cl->do_evacuation_and_fixup(nm);
+  }
+
+  void do_remaining_strong_processing(nmethod* nm) {
+    _cl->do_marking(nm);
+  }
+};
+
+void G1CodeBlobClosure::do_code_blob(CodeBlob* cb) {
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm == NULL) {
+    return;
+  }
+
+  G1NmethodProcessor cl(this);
+
+  if (_strong) {
+    nm->oops_do_process_strong(&cl);
+  } else {
+    nm->oops_do_process_weak(&cl);
+  }
+}
--- a/src/hotspot/share/gc/g1/g1CodeBlobClosure.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1CodeBlobClosure.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -28,9 +28,11 @@
 #include "gc/g1/g1CollectedHeap.hpp"
 #include "memory/iterator.hpp"
 
+class G1ConcurrentMark;
 class nmethod;
 
 class G1CodeBlobClosure : public CodeBlobClosure {
+  // Gather nmethod remembered set entries.
   class HeapRegionGatheringOopClosure : public OopClosure {
     G1CollectedHeap* _g1h;
     OopClosure* _work;
@@ -50,9 +52,35 @@
     }
   };
 
+  // Mark all oops below TAMS.
+  class MarkingOopClosure : public OopClosure {
+    G1ConcurrentMark* _cm;
+    uint _worker_id;
+
+    template <typename T>
+    void do_oop_work(T* p);
+
+  public:
+    MarkingOopClosure(uint worker_id);
+
+    void do_oop(oop* o);
+    void do_oop(narrowOop* o);
+  };
+
   HeapRegionGatheringOopClosure _oc;
+  MarkingOopClosure _marking_oc;
+
+  bool _strong;
+
+  void do_code_blob_weak(CodeBlob* cb);
+  void do_code_blob_strong(CodeBlob* cb);
+
 public:
-  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
+  G1CodeBlobClosure(uint worker_id, OopClosure* oc, bool strong) :
+    _oc(oc), _marking_oc(worker_id), _strong(strong) { }
+
+  void do_evacuation_and_fixup(nmethod* nm);
+  void do_marking(nmethod* nm);
 
   void do_code_blob(CodeBlob* cb);
 };
--- a/src/hotspot/share/gc/g1/g1FullCollector.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -198,13 +198,17 @@
   // Recursively traverse all live objects and mark them.
   GCTraceTime(Info, gc, phases) info("Phase 1: Mark live objects", scope()->timer());
 
-  // Do the actual marking.
-  G1FullGCMarkTask marking_task(this);
-  run_task(&marking_task);
+  {
+    // Do the actual marking.
+    G1FullGCMarkTask marking_task(this);
+    run_task(&marking_task);
+  }
 
-  // Process references discovered during marking.
-  G1FullGCReferenceProcessingExecutor reference_processing(this);
-  reference_processing.execute(scope()->timer(), scope()->tracer());
+  {
+    // Process references discovered during marking.
+    G1FullGCReferenceProcessingExecutor reference_processing(this);
+    reference_processing.execute(scope()->timer(), scope()->tracer());
+  }
 
   // Weak oops cleanup.
   {
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -62,7 +62,6 @@
   _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms):");
   AOT_ONLY(_gc_par_phases[AOTCodeRoots] = new WorkerDataArray<double>(max_gc_threads, "AOT Root Scan (ms):");)
   _gc_par_phases[CMRefRoots] = new WorkerDataArray<double>(max_gc_threads, "CM RefProcessor Roots (ms):");
-  _gc_par_phases[WaitForStrongRoots] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong Roots (ms):");
 
   _gc_par_phases[MergeER] = new WorkerDataArray<double>(max_gc_threads, "Eager Reclaim (ms):");
 
@@ -566,7 +565,6 @@
       "JVMTIRoots",
       AOT_ONLY("AOTCodeRoots" COMMA)
       "CMRefRoots",
-      "WaitForStrongRoots",
       "MergeER",
       "MergeRS",
       "OptMergeRS",
--- a/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -57,7 +57,6 @@
     JVMTIRoots,
     AOT_ONLY(AOTCodeRoots COMMA)
     CMRefRoots,
-    WaitForStrongRoots,
     MergeER,
     MergeRS,
     OptMergeRS,
@@ -83,7 +82,7 @@
   };
 
   static const GCParPhases ExtRootScanSubPhasesFirst = ThreadRoots;
-  static const GCParPhases ExtRootScanSubPhasesLast = WaitForStrongRoots;
+  static const GCParPhases ExtRootScanSubPhasesLast = CMRefRoots;
 
   enum GCMergeRSWorkTimes {
     MergeRSMergedSparse,
--- a/src/hotspot/share/gc/g1/g1RootClosures.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1RootClosures.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -45,8 +45,6 @@
 
   CodeBlobClosure* strong_codeblobs()      { return &_closures._codeblobs; }
   CodeBlobClosure* weak_codeblobs()        { return &_closures._codeblobs; }
-
-  bool trace_metadata()         { return false; }
 };
 
 // Closures used during initial mark.
@@ -71,10 +69,6 @@
 
   CodeBlobClosure* strong_codeblobs()      { return &_strong._codeblobs; }
   CodeBlobClosure* weak_codeblobs()        { return &_weak._codeblobs; }
-
-  // If we are not marking all weak roots then we are tracing
-  // which metadata is alive.
-  bool trace_metadata()         { return MarkWeak == G1MarkPromotedFromRoot; }
 };
 
 G1EvacuationRootClosures* G1EvacuationRootClosures::create_root_closures(G1ParScanThreadState* pss, G1CollectedHeap* g1h) {
--- a/src/hotspot/share/gc/g1/g1RootClosures.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1RootClosures.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -52,9 +52,6 @@
   // Applied to code blobs treated as weak roots.
   virtual CodeBlobClosure* weak_codeblobs() = 0;
 
-  // Is this closure used for tracing metadata?
-  virtual bool trace_metadata() = 0;
-
   static G1EvacuationRootClosures* create_root_closures(G1ParScanThreadState* pss, G1CollectedHeap* g1h);
 };
 
--- a/src/hotspot/share/gc/g1/g1RootProcessor.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1RootProcessor.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -45,34 +45,10 @@
 #include "services/management.hpp"
 #include "utilities/macros.hpp"
 
-void G1RootProcessor::worker_has_discovered_all_strong_nmethods() {
-  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
-
-  uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
-  if (new_value == n_workers()) {
-    // This thread is last. Notify the others.
-    MonitorLocker ml(&_lock, Mutex::_no_safepoint_check_flag);
-    _lock.notify_all();
-  }
-}
-
-void G1RootProcessor::wait_until_all_strong_nmethods_discovered() {
-  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
-
-  if ((uint)_n_workers_discovered_strong_classes != n_workers()) {
-    MonitorLocker ml(&_lock, Mutex::_no_safepoint_check_flag);
-    while ((uint)_n_workers_discovered_strong_classes != n_workers()) {
-      ml.wait(0);
-    }
-  }
-}
-
 G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h, uint n_workers) :
     _g1h(g1h),
     _process_strong_tasks(G1RP_PS_NumElements),
-    _srs(n_workers),
-    _lock(Mutex::leaf, "G1 Root Scan barrier lock", false, Mutex::_safepoint_check_never),
-    _n_workers_discovered_strong_classes(0) {}
+    _srs(n_workers) {}
 
 void G1RootProcessor::evacuate_roots(G1ParScanThreadState* pss, uint worker_id) {
   G1GCPhaseTimes* phase_times = _g1h->phase_times();
@@ -80,7 +56,7 @@
   G1EvacPhaseTimesTracker timer(phase_times, pss, G1GCPhaseTimes::ExtRootScan, worker_id);
 
   G1EvacuationRootClosures* closures = pss->closures();
-  process_java_roots(closures, phase_times, worker_id, closures->trace_metadata() /* notify_claimed_nmethods_done */);
+  process_java_roots(closures, phase_times, worker_id);
 
   process_vm_roots(closures, phase_times, worker_id);
 
@@ -96,12 +72,6 @@
     }
   }
 
-  if (closures->trace_metadata()) {
-    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::WaitForStrongRoots, worker_id);
-    // Wait to make sure all workers passed the strong nmethods phase.
-    wait_until_all_strong_nmethods_discovered();
-  }
-
   _process_strong_tasks.all_tasks_completed(n_workers());
 }
 
@@ -171,8 +141,7 @@
 
 void G1RootProcessor::process_java_roots(G1RootClosures* closures,
                                          G1GCPhaseTimes* phase_times,
-                                         uint worker_id,
-                                         bool notify_claimed_nmethods_done) {
+                                         uint worker_id) {
   // We need to make make sure that the "strong" nmethods are processed first
   // using the strong closure. Only after that we process the weakly reachable
   // nmethods.
@@ -197,12 +166,6 @@
                                        closures->strong_codeblobs());
   }
 
-  // This is the point where this worker thread will not find more strong nmethods.
-  // Report this so G1 can synchronize the strong and weak CLDs/nmethods processing.
-  if (notify_claimed_nmethods_done) {
-    worker_has_discovered_all_strong_nmethods();
-  }
-
   {
     G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CLDGRoots, worker_id);
     if (_process_strong_tasks.try_claim_task(G1RP_PS_ClassLoaderDataGraph_oops_do)) {
--- a/src/hotspot/share/gc/g1/g1RootProcessor.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1RootProcessor.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -50,10 +50,6 @@
   SubTasksDone _process_strong_tasks;
   StrongRootsScope _srs;
 
-  // Used to implement the Thread work barrier.
-  Monitor _lock;
-  volatile jint _n_workers_discovered_strong_classes;
-
   enum G1H_process_roots_tasks {
     G1RP_PS_Universe_oops_do,
     G1RP_PS_JNIHandles_oops_do,
@@ -69,13 +65,9 @@
     G1RP_PS_NumElements
   };
 
-  void worker_has_discovered_all_strong_nmethods();
-  void wait_until_all_strong_nmethods_discovered();
-
   void process_java_roots(G1RootClosures* closures,
                           G1GCPhaseTimes* phase_times,
-                          uint worker_id,
-                          bool notify_claimed_nmethods_done = false);
+                          uint worker_id);
 
   void process_vm_roots(G1RootClosures* closures,
                         G1GCPhaseTimes* phase_times,
--- a/src/hotspot/share/gc/g1/g1SharedClosures.hpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/g1/g1SharedClosures.hpp	Thu Oct 24 11:08:16 2019 +0200
@@ -32,6 +32,11 @@
 // Simple holder object for a complete set of closures used by the G1 evacuation code.
 template <G1Mark Mark>
 class G1SharedClosures {
+  static bool needs_strong_processing() {
+    // Request strong code root processing when G1MarkFromRoot is passed in during
+    // initial mark.
+    return Mark == G1MarkFromRoot;
+  }
 public:
   G1ParCopyClosure<G1BarrierNone, Mark> _oops;
   G1ParCopyClosure<G1BarrierCLD,  Mark> _oops_in_cld;
@@ -43,5 +48,5 @@
     _oops(g1h, pss),
     _oops_in_cld(g1h, pss),
     _clds(&_oops_in_cld, process_only_dirty),
-    _codeblobs(&_oops) {}
+    _codeblobs(pss->worker_id(), &_oops, needs_strong_processing()) {}
 };
--- a/src/hotspot/share/gc/z/zRootsIterator.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/gc/z/zRootsIterator.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -149,7 +149,7 @@
 
   virtual void do_code_blob(CodeBlob* cb) {
     nmethod* const nm = cb->as_nmethod_or_null();
-    if (nm != NULL && !nm->test_set_oops_do_mark()) {
+    if (nm != NULL && nm->oops_do_try_claim()) {
       CodeBlobToOopClosure::do_code_blob(cb);
       _bs->disarm(nm);
     }
--- a/src/hotspot/share/memory/iterator.cpp	Thu Oct 24 15:53:20 2019 +0800
+++ b/src/hotspot/share/memory/iterator.cpp	Thu Oct 24 11:08:16 2019 +0200
@@ -59,7 +59,7 @@
 
 void MarkingCodeBlobClosure::do_code_blob(CodeBlob* cb) {
   nmethod* nm = cb->as_nmethod_or_null();
-  if (nm != NULL && !nm->test_set_oops_do_mark()) {
+  if (nm != NULL && nm->oops_do_try_claim()) {
     do_nmethod(nm);
   }
 }
--- a/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java	Thu Oct 24 15:53:20 2019 +0800
+++ b/test/hotspot/jtreg/gc/g1/TestGCLogMessages.java	Thu Oct 24 11:08:16 2019 +0200
@@ -128,7 +128,6 @@
         new LogMessageWithLevel("CLDG Roots", Level.TRACE),
         new LogMessageWithLevel("JVMTI Roots", Level.TRACE),
         new LogMessageWithLevel("CM RefProcessor Roots", Level.TRACE),
-        new LogMessageWithLevel("Wait For Strong Roots", Level.TRACE),
         // Redirty Cards
         new LogMessageWithLevel("Redirty Cards", Level.DEBUG),
         new LogMessageWithLevel("Parallel Redirty", Level.TRACE),
--- a/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java	Thu Oct 24 15:53:20 2019 +0800
+++ b/test/jdk/jdk/jfr/event/gc/collection/TestG1ParallelPhases.java	Thu Oct 24 11:08:16 2019 +0200
@@ -98,7 +98,6 @@
             "CLDGRoots",
             "JVMTIRoots",
             "CMRefRoots",
-            "WaitForStrongRoots",
             "MergeER",
             "MergeHCC",
             "MergeRS",