8232588: G1 concurrent System.gc can return early or late
authorkbarrett
Wed, 13 Nov 2019 18:00:30 -0500
changeset 59067 f080b08daace
parent 59066 439a147b2c0c
child 59068 dc45ed0ab083
8232588: G1 concurrent System.gc can return early or late 8233279: G1: GCLocker GC with +GCLockerInvokesConcurrent spins while cycle in progress Summary: Refactor G1CH::try_collect and fix bugs with concurrent collections. Reviewed-by: tschatzl, sjohanss
src/hotspot/share/gc/g1/g1CollectedHeap.cpp
src/hotspot/share/gc/g1/g1CollectedHeap.hpp
src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp
src/hotspot/share/gc/g1/g1VMOperations.cpp
src/hotspot/share/gc/g1/g1VMOperations.hpp
src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp
src/hotspot/share/runtime/mutexLocker.cpp
src/hotspot/share/runtime/mutexLocker.hpp
src/hotspot/share/runtime/vmOperations.hpp
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	Wed Nov 13 18:00:30 2019 -0500
@@ -2009,7 +2009,7 @@
 }
 
 bool G1CollectedHeap::should_upgrade_to_full_gc(GCCause::Cause cause) {
-  if(policy()->force_upgrade_to_full()) {
+  if (policy()->force_upgrade_to_full()) {
     return true;
   } else if (should_do_concurrent_full_gc(_gc_cause)) {
     return false;
@@ -2056,7 +2056,7 @@
 }
 
 void G1CollectedHeap::increment_old_marking_cycles_completed(bool concurrent) {
-  MonitorLocker x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
+  MonitorLocker ml(G1OldGCCount_lock, Mutex::_no_safepoint_check_flag);
 
   // We assume that if concurrent == true, then the caller is a
   // concurrent thread that was joined the Suspendible Thread
@@ -2096,91 +2096,211 @@
     _cm_thread->set_idle();
   }
 
-  // This notify_all() will ensure that a thread that called
-  // System.gc() with (with ExplicitGCInvokesConcurrent set or not)
-  // and it's waiting for a full GC to finish will be woken up. It is
-  // waiting in VM_G1CollectForAllocation::doit_epilogue().
-  FullGCCount_lock->notify_all();
+  // Notify threads waiting in System.gc() (with ExplicitGCInvokesConcurrent)
+  // for a full GC to finish that their wait is over.
+  ml.notify_all();
 }
 
 void G1CollectedHeap::collect(GCCause::Cause cause) {
-  try_collect(cause, true);
+  try_collect(cause);
+}
+
+// Return true if (x < y) with allowance for wraparound.
+static bool gc_counter_less_than(uint x, uint y) {
+  return (x - y) > (UINT_MAX/2);
 }
 
-bool G1CollectedHeap::try_collect(GCCause::Cause cause, bool retry_on_gc_failure) {
+// LOG_COLLECT_CONCURRENTLY(cause, msg, args...)
+// Macro so msg printing is format-checked.
+#define LOG_COLLECT_CONCURRENTLY(cause, ...)                            \
+  do {                                                                  \
+    LogTarget(Trace, gc) LOG_COLLECT_CONCURRENTLY_lt;                   \
+    if (LOG_COLLECT_CONCURRENTLY_lt.is_enabled()) {                     \
+      ResourceMark rm; /* For thread name. */                           \
+      LogStream LOG_COLLECT_CONCURRENTLY_s(&LOG_COLLECT_CONCURRENTLY_lt); \
+      LOG_COLLECT_CONCURRENTLY_s.print("%s: Try Collect Concurrently (%s): ", \
+                                       Thread::current()->name(),       \
+                                       GCCause::to_string(cause));      \
+      LOG_COLLECT_CONCURRENTLY_s.print(__VA_ARGS__);                    \
+    }                                                                   \
+  } while (0)
+
+#define LOG_COLLECT_CONCURRENTLY_COMPLETE(cause, result) \
+  LOG_COLLECT_CONCURRENTLY(cause, "complete %s", BOOL_TO_STR(result))
+
+bool G1CollectedHeap::try_collect_concurrently(GCCause::Cause cause,
+                                               uint gc_counter,
+                                               uint old_marking_started_before) {
   assert_heap_not_locked();
-
-  bool gc_succeeded;
-  bool should_retry_gc;
-
-  do {
-    should_retry_gc = false;
-
-    uint gc_count_before;
-    uint old_marking_count_before;
-    uint full_gc_count_before;
-
+  assert(should_do_concurrent_full_gc(cause),
+         "Non-concurrent cause %s", GCCause::to_string(cause));
+
+  for (uint i = 1; true; ++i) {
+    // Try to schedule an initial-mark evacuation pause that will
+    // start a concurrent cycle.
+    LOG_COLLECT_CONCURRENTLY(cause, "attempt %u", i);
+    VM_G1TryInitiateConcMark op(gc_counter,
+                                cause,
+                                policy()->max_pause_time_ms());
+    VMThread::execute(&op);
+
+    // Request is trivially finished.
+    if (cause == GCCause::_g1_periodic_collection) {
+      LOG_COLLECT_CONCURRENTLY_COMPLETE(cause, op.gc_succeeded());
+      return op.gc_succeeded();
+    }
+
+    // Lock to get consistent set of values.
+    uint old_marking_started_after;
+    uint old_marking_completed_after;
     {
       MutexLocker ml(Heap_lock);
-
-      // Read the GC count while holding the Heap_lock
-      gc_count_before = total_collections();
-      full_gc_count_before = total_full_collections();
-      old_marking_count_before = _old_marking_cycles_started;
+      // Update gc_counter for retrying VMOp if needed. Captured here to be
+      // consistent with the values we use below for termination tests.  If
+      // a retry is needed after a possible wait, and another collection
+      // occurs in the meantime, it will cause our retry to be skipped and
+      // we'll recheck for termination with updated conditions from that
+      // more recent collection.  That's what we want, rather than having
+      // our retry possibly perform an unnecessary collection.
+      gc_counter = total_collections();
+      old_marking_started_after = _old_marking_cycles_started;
+      old_marking_completed_after = _old_marking_cycles_completed;
     }
 
-    if (should_do_concurrent_full_gc(cause)) {
-      // Schedule an initial-mark evacuation pause that will start a
-      // concurrent cycle. We're setting word_size to 0 which means that
-      // we are not requesting a post-GC allocation.
-      VM_G1CollectForAllocation op(0,     /* word_size */
-                                   gc_count_before,
-                                   cause,
-                                   true,  /* should_initiate_conc_mark */
-                                   policy()->max_pause_time_ms());
-      VMThread::execute(&op);
-      gc_succeeded = op.gc_succeeded();
-      if (!gc_succeeded && retry_on_gc_failure) {
-        if (old_marking_count_before == _old_marking_cycles_started) {
-          should_retry_gc = op.should_retry_gc();
-        } else {
-          // A Full GC happened while we were trying to schedule the
-          // concurrent cycle. No point in starting a new cycle given
-          // that the whole heap was collected anyway.
+    if (!GCCause::is_user_requested_gc(cause)) {
+      // For an "automatic" (not user-requested) collection, we just need to
+      // ensure that progress is made.
+      //
+      // Request is finished if any of
+      // (1) the VMOp successfully performed a GC,
+      // (2) a concurrent cycle was already in progress,
+      // (3) a new cycle was started (by this thread or some other), or
+      // (4) a Full GC was performed.
+      // Cases (3) and (4) are detected together by a change to
+      // _old_marking_cycles_started.
+      //
+      // Note that (1) does not imply (3).  If we're still in the mixed
+      // phase of an earlier concurrent collection, the request to make the
+      // collection an initial-mark won't be honored.  If we don't check for
+      // both conditions we'll spin doing back-to-back collections.
+      if (op.gc_succeeded() ||
+          op.cycle_already_in_progress() ||
+          (old_marking_started_before != old_marking_started_after)) {
+        LOG_COLLECT_CONCURRENTLY_COMPLETE(cause, true);
+        return true;
+      }
+    } else {                    // User-requested GC.
+      // For a user-requested collection, we want to ensure that a complete
+      // full collection has been performed before returning, but without
+      // waiting for more than needed.
+
+      // For user-requested GCs (unlike non-UR), a successful VMOp implies a
+      // new cycle was started.  That's good, because it's not clear what we
+      // should do otherwise.  Trying again just does back to back GCs.
+      // Can't wait for someone else to start a cycle.  And returning fails
+      // to meet the goal of ensuring a full collection was performed.
+      assert(!op.gc_succeeded() ||
+             (old_marking_started_before != old_marking_started_after),
+             "invariant: succeeded %s, started before %u, started after %u",
+             BOOL_TO_STR(op.gc_succeeded()),
+             old_marking_started_before, old_marking_started_after);
+
+      // Request is finished if a full collection (concurrent or stw)
+      // was started after this request and has completed, e.g.
+      // started_before < completed_after.
+      if (gc_counter_less_than(old_marking_started_before,
+                               old_marking_completed_after)) {
+        LOG_COLLECT_CONCURRENTLY_COMPLETE(cause, true);
+        return true;
+      }
+
+      if (old_marking_started_after != old_marking_completed_after) {
+        // If there is an in-progress cycle (possibly started by us), then
+        // wait for that cycle to complete, e.g.
+        // while completed_now < started_after.
+        LOG_COLLECT_CONCURRENTLY(cause, "wait");
+        MonitorLocker ml(G1OldGCCount_lock);
+        while (gc_counter_less_than(_old_marking_cycles_completed,
+                                    old_marking_started_after)) {
+          ml.wait();
         }
-
-        if (should_retry_gc && GCLocker::is_active_and_needs_gc()) {
-          GCLocker::stall_until_clear();
+        // Request is finished if the collection we just waited for was
+        // started after this request.
+        if (old_marking_started_before != old_marking_started_after) {
+          LOG_COLLECT_CONCURRENTLY(cause, "complete after wait");
+          return true;
         }
       }
-    } else if (GCLocker::should_discard(cause, gc_count_before)) {
-      // Return false to be consistent with VMOp failure due to
-      // another collection slipping in after our gc_count but before
-      // our request is processed.  _gc_locker collections upgraded by
-      // GCLockerInvokesConcurrent are handled above and never discarded.
-      return false;
-    } else {
-      if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc
-          DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
-
-        // Schedule a standard evacuation pause. We're setting word_size
-        // to 0 which means that we are not requesting a post-GC allocation.
-        VM_G1CollectForAllocation op(0,     /* word_size */
-                                     gc_count_before,
-                                     cause,
-                                     false, /* should_initiate_conc_mark */
-                                     policy()->max_pause_time_ms());
-        VMThread::execute(&op);
-        gc_succeeded = op.gc_succeeded();
-      } else {
-        // Schedule a Full GC.
-        VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
-        VMThread::execute(&op);
-        gc_succeeded = op.gc_succeeded();
+
+      // If VMOp was successful then it started a new cycle that the above
+      // wait &etc should have recognized as finishing this request.  This
+      // differs from a non-user-request, where gc_succeeded does not imply
+      // a new cycle was started.
+      assert(!op.gc_succeeded(), "invariant");
+
+      // If VMOp failed because a cycle was already in progress, it is now
+      // complete.  But it didn't finish this user-requested GC, so try
+      // again.
+      if (op.cycle_already_in_progress()) {
+        LOG_COLLECT_CONCURRENTLY(cause, "retry after in-progress");
+        continue;
       }
     }
-  } while (should_retry_gc);
-  return gc_succeeded;
+
+    // Collection failed and should be retried.
+    assert(op.transient_failure(), "invariant");
+
+    // If GCLocker is active, wait until clear before retrying.
+    if (GCLocker::is_active_and_needs_gc()) {
+      LOG_COLLECT_CONCURRENTLY(cause, "gc-locker stall");
+      GCLocker::stall_until_clear();
+    }
+
+    LOG_COLLECT_CONCURRENTLY(cause, "retry");
+  }
+}
+
+bool G1CollectedHeap::try_collect(GCCause::Cause cause) {
+  assert_heap_not_locked();
+
+  // Lock to get consistent set of values.
+  uint gc_count_before;
+  uint full_gc_count_before;
+  uint old_marking_started_before;
+  {
+    MutexLocker ml(Heap_lock);
+    gc_count_before = total_collections();
+    full_gc_count_before = total_full_collections();
+    old_marking_started_before = _old_marking_cycles_started;
+  }
+
+  if (should_do_concurrent_full_gc(cause)) {
+    return try_collect_concurrently(cause,
+                                    gc_count_before,
+                                    old_marking_started_before);
+  } else if (GCLocker::should_discard(cause, gc_count_before)) {
+    // Indicate failure to be consistent with VMOp failure due to
+    // another collection slipping in after our gc_count but before
+    // our request is processed.  _gc_locker collections upgraded by
+    // GCLockerInvokesConcurrent are handled above and never discarded.
+    return false;
+  } else if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc
+             DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
+
+    // Schedule a standard evacuation pause. We're setting word_size
+    // to 0 which means that we are not requesting a post-GC allocation.
+    VM_G1CollectForAllocation op(0,     /* word_size */
+                                 gc_count_before,
+                                 cause,
+                                 policy()->max_pause_time_ms());
+    VMThread::execute(&op);
+    return op.gc_succeeded();
+  } else {
+    // Schedule a Full GC.
+    VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
+    VMThread::execute(&op);
+    return op.gc_succeeded();
+  }
 }
 
 bool G1CollectedHeap::is_in(const void* p) const {
@@ -2611,7 +2731,6 @@
   VM_G1CollectForAllocation op(word_size,
                                gc_count_before,
                                gc_cause,
-                               false, /* should_initiate_conc_mark */
                                policy()->max_pause_time_ms());
   VMThread::execute(&op);
 
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp	Wed Nov 13 18:00:30 2019 -0500
@@ -133,6 +133,7 @@
   friend class VM_CollectForMetadataAllocation;
   friend class VM_G1CollectForAllocation;
   friend class VM_G1CollectFull;
+  friend class VM_G1TryInitiateConcMark;
   friend class VMStructs;
   friend class MutatorAllocRegion;
   friend class G1FullCollector;
@@ -259,16 +260,22 @@
 
   G1HRPrinter _hr_printer;
 
-  // It decides whether an explicit GC should start a concurrent cycle
-  // instead of doing a STW GC. Currently, a concurrent cycle is
-  // explicitly started if:
-  // (a) cause == _gc_locker and +GCLockerInvokesConcurrent, or
-  // (b) cause == _g1_humongous_allocation
-  // (c) cause == _java_lang_system_gc and +ExplicitGCInvokesConcurrent.
-  // (d) cause == _dcmd_gc_run and +ExplicitGCInvokesConcurrent.
-  // (e) cause == _wb_conc_mark
+  // Return true if an explicit GC should start a concurrent cycle instead
+  // of doing a STW full GC. A concurrent cycle should be started if:
+  // (a) cause == _gc_locker and +GCLockerInvokesConcurrent,
+  // (b) cause == _g1_humongous_allocation,
+  // (c) cause == _java_lang_system_gc and +ExplicitGCInvokesConcurrent,
+  // (d) cause == _dcmd_gc_run and +ExplicitGCInvokesConcurrent,
+  // (e) cause == _wb_conc_mark,
+  // (f) cause == _g1_periodic_collection and +G1PeriodicGCInvokesConcurrent.
   bool should_do_concurrent_full_gc(GCCause::Cause cause);
 
+  // Attempt to start a concurrent cycle with the indicated cause.
+  // precondition: should_do_concurrent_full_gc(cause)
+  bool try_collect_concurrently(GCCause::Cause cause,
+                                uint gc_counter,
+                                uint old_marking_started_before);
+
   // Return true if should upgrade to full gc after an incremental one.
   bool should_upgrade_to_full_gc(GCCause::Cause cause);
 
@@ -630,7 +637,7 @@
   // Full GC). If concurrent is true, the caller is the outer caller
   // in this nesting (i.e., the concurrent cycle). Further nesting is
   // not currently supported. The end of this call also notifies
-  // the FullGCCount_lock in case a Java thread is waiting for a full
+  // the G1OldGCCount_lock in case a Java thread is waiting for a full
   // GC to happen (e.g., it called System.gc() with
   // +ExplicitGCInvokesConcurrent).
   void increment_old_marking_cycles_completed(bool concurrent);
@@ -1088,10 +1095,9 @@
   // "CollectedHeap" supports.
   virtual void collect(GCCause::Cause cause);
 
-  // Perform a collection of the heap with the given cause; if the VM operation
-  // fails to execute for any reason, retry only if retry_on_gc_failure is set.
+  // Perform a collection of the heap with the given cause.
   // Returns whether this collection actually executed.
-  bool try_collect(GCCause::Cause cause, bool retry_on_gc_failure);
+  bool try_collect(GCCause::Cause cause);
 
   // True iff an evacuation has failed in the most-recent collection.
   bool evacuation_failed() { return _evacuation_failed; }
--- a/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMarkThread.cpp	Wed Nov 13 18:00:30 2019 -0500
@@ -393,7 +393,7 @@
     }
 
     // Update the number of full collections that have been
-    // completed. This will also notify the FullGCCount_lock in case a
+    // completed. This will also notify the G1OldGCCount_lock in case a
     // Java thread is waiting for a full GC to happen (e.g., it
     // called System.gc() with +ExplicitGCInvokesConcurrent).
     {
--- a/src/hotspot/share/gc/g1/g1VMOperations.cpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1VMOperations.cpp	Wed Nov 13 18:00:30 2019 -0500
@@ -40,17 +40,61 @@
   _gc_succeeded = g1h->do_full_collection(true /* explicit_gc */, false /* clear_all_soft_refs */);
 }
 
+VM_G1TryInitiateConcMark::VM_G1TryInitiateConcMark(uint gc_count_before,
+                                                   GCCause::Cause gc_cause,
+                                                   double target_pause_time_ms) :
+  VM_GC_Operation(gc_count_before, gc_cause),
+  _target_pause_time_ms(target_pause_time_ms),
+  _transient_failure(false),
+  _cycle_already_in_progress(false),
+  _gc_succeeded(false)
+{}
+
+bool VM_G1TryInitiateConcMark::doit_prologue() {
+  bool result = VM_GC_Operation::doit_prologue();
+  // The prologue can fail for a couple of reasons. The first is that another GC
+  // got scheduled and prevented the scheduling of the initial mark GC. The
+  // second is that the GC locker may be active and the heap can't be expanded.
+  // In both cases we want to retry the GC so that the initial mark pause is
+  // actually scheduled. In the second case, however, we should stall until
+  // until the GC locker is no longer active and then retry the initial mark GC.
+  if (!result) _transient_failure = true;
+  return result;
+}
+
+void VM_G1TryInitiateConcMark::doit() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  GCCauseSetter x(g1h, _gc_cause);
+  if (!g1h->policy()->force_initial_mark_if_outside_cycle(_gc_cause)) {
+    // Failure to force the next GC pause to be an initial mark indicates
+    // there is already a concurrent marking cycle in progress.  Set flag
+    // to notify the caller and return immediately.
+    _cycle_already_in_progress = true;
+  } else if (!g1h->do_collection_pause_at_safepoint(_target_pause_time_ms)) {
+    // Failure to perform the collection at all occurs because GCLocker is
+    // active, and we have the bad luck to be the collection request that
+    // makes a later _gc_locker collection needed.  (Else we would have hit
+    // the GCLocker check in the prologue.)
+    _transient_failure = true;
+  } else if (g1h->should_upgrade_to_full_gc(_gc_cause)) {
+    // GC ran, but we're still in trouble and need a full GC.
+    log_info(gc, ergo)("Attempting maximally compacting collection");
+    _gc_succeeded = g1h->do_full_collection(false, /* explicit gc */
+                                            true /* clear_all_soft_refs */);
+    guarantee(_gc_succeeded, "Elevated collections during the safepoint must always succeed");
+  } else {
+    _gc_succeeded = true;
+  }
+}
+
 VM_G1CollectForAllocation::VM_G1CollectForAllocation(size_t         word_size,
                                                      uint           gc_count_before,
                                                      GCCause::Cause gc_cause,
-                                                     bool           should_initiate_conc_mark,
                                                      double         target_pause_time_ms) :
   VM_CollectForAllocation(word_size, gc_count_before, gc_cause),
   _gc_succeeded(false),
-  _should_initiate_conc_mark(should_initiate_conc_mark),
-  _should_retry_gc(false),
-  _target_pause_time_ms(target_pause_time_ms),
-  _old_marking_cycles_completed_before(0) {
+  _target_pause_time_ms(target_pause_time_ms) {
 
   guarantee(target_pause_time_ms > 0.0,
             "target_pause_time_ms = %1.6lf should be positive",
@@ -58,26 +102,8 @@
   _gc_cause = gc_cause;
 }
 
-bool VM_G1CollectForAllocation::doit_prologue() {
-  bool res = VM_CollectForAllocation::doit_prologue();
-  if (!res) {
-    if (_should_initiate_conc_mark) {
-      // The prologue can fail for a couple of reasons. The first is that another GC
-      // got scheduled and prevented the scheduling of the initial mark GC. The
-      // second is that the GC locker may be active and the heap can't be expanded.
-      // In both cases we want to retry the GC so that the initial mark pause is
-      // actually scheduled. In the second case, however, we should stall until
-      // until the GC locker is no longer active and then retry the initial mark GC.
-      _should_retry_gc = true;
-    }
-  }
-  return res;
-}
-
 void VM_G1CollectForAllocation::doit() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  assert(!_should_initiate_conc_mark || g1h->should_do_concurrent_full_gc(_gc_cause),
-      "only a GC locker, a System.gc(), stats update, whitebox, or a hum allocation induced GC should start a cycle");
 
   if (_word_size > 0) {
     // An allocation has been requested. So, try to do that first.
@@ -92,44 +118,6 @@
   }
 
   GCCauseSetter x(g1h, _gc_cause);
-  if (_should_initiate_conc_mark) {
-    // It's safer to read old_marking_cycles_completed() here, given
-    // that noone else will be updating it concurrently. Since we'll
-    // only need it if we're initiating a marking cycle, no point in
-    // setting it earlier.
-    _old_marking_cycles_completed_before = g1h->old_marking_cycles_completed();
-
-    // At this point we are supposed to start a concurrent cycle. We
-    // will do so if one is not already in progress.
-    bool res = g1h->policy()->force_initial_mark_if_outside_cycle(_gc_cause);
-
-    // The above routine returns true if we were able to force the
-    // next GC pause to be an initial mark; it returns false if a
-    // marking cycle is already in progress.
-    //
-    // If a marking cycle is already in progress just return and skip the
-    // pause below - if the reason for requesting this initial mark pause
-    // was due to a System.gc() then the requesting thread should block in
-    // doit_epilogue() until the marking cycle is complete.
-    //
-    // If this initial mark pause was requested as part of a humongous
-    // allocation then we know that the marking cycle must just have
-    // been started by another thread (possibly also allocating a humongous
-    // object) as there was no active marking cycle when the requesting
-    // thread checked before calling collect() in
-    // attempt_allocation_humongous(). Retrying the GC, in this case,
-    // will cause the requesting thread to spin inside collect() until the
-    // just started marking cycle is complete - which may be a while. So
-    // we do NOT retry the GC.
-    if (!res) {
-      assert(_word_size == 0, "Concurrent Full GC/Humongous Object IM shouldn't be allocating");
-      if (_gc_cause != GCCause::_g1_humongous_allocation) {
-        _should_retry_gc = true;
-      }
-      return;
-    }
-  }
-
   // Try a partial collection of some kind.
   _gc_succeeded = g1h->do_collection_pause_at_safepoint(_target_pause_time_ms);
 
@@ -138,66 +126,15 @@
       // An allocation had been requested. Do it, eventually trying a stronger
       // kind of GC.
       _result = g1h->satisfy_failed_allocation(_word_size, &_gc_succeeded);
-    } else {
-      bool should_upgrade_to_full = g1h->should_upgrade_to_full_gc(_gc_cause);
-
-      if (should_upgrade_to_full) {
-        // There has been a request to perform a GC to free some space. We have no
-        // information on how much memory has been asked for. In case there are
-        // absolutely no regions left to allocate into, do a maximally compacting full GC.
-        log_info(gc, ergo)("Attempting maximally compacting collection");
-        _gc_succeeded = g1h->do_full_collection(false, /* explicit gc */
-                                                   true   /* clear_all_soft_refs */);
-      }
+    } else if (g1h->should_upgrade_to_full_gc(_gc_cause)) {
+      // There has been a request to perform a GC to free some space. We have no
+      // information on how much memory has been asked for. In case there are
+      // absolutely no regions left to allocate into, do a maximally compacting full GC.
+      log_info(gc, ergo)("Attempting maximally compacting collection");
+      _gc_succeeded = g1h->do_full_collection(false, /* explicit gc */
+                                              true   /* clear_all_soft_refs */);
     }
     guarantee(_gc_succeeded, "Elevated collections during the safepoint must always succeed.");
-  } else {
-    assert(_result == NULL, "invariant");
-    // The only reason for the pause to not be successful is that, the GC locker is
-    // active (or has become active since the prologue was executed). In this case
-    // we should retry the pause after waiting for the GC locker to become inactive.
-    _should_retry_gc = true;
-  }
-}
-
-void VM_G1CollectForAllocation::doit_epilogue() {
-  VM_CollectForAllocation::doit_epilogue();
-
-  // If the pause was initiated by a System.gc() and
-  // +ExplicitGCInvokesConcurrent, we have to wait here for the cycle
-  // that just started (or maybe one that was already in progress) to
-  // finish.
-  if (GCCause::is_user_requested_gc(_gc_cause) &&
-      _should_initiate_conc_mark) {
-    assert(ExplicitGCInvokesConcurrent,
-           "the only way to be here is if ExplicitGCInvokesConcurrent is set");
-
-    G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-    // In the doit() method we saved g1h->old_marking_cycles_completed()
-    // in the _old_marking_cycles_completed_before field. We have to
-    // wait until we observe that g1h->old_marking_cycles_completed()
-    // has increased by at least one. This can happen if a) we started
-    // a cycle and it completes, b) a cycle already in progress
-    // completes, or c) a Full GC happens.
-
-    // If the condition has already been reached, there's no point in
-    // actually taking the lock and doing the wait.
-    if (g1h->old_marking_cycles_completed() <=
-                                          _old_marking_cycles_completed_before) {
-      // The following is largely copied from CMS
-
-      Thread* thr = Thread::current();
-      assert(thr->is_Java_thread(), "invariant");
-      JavaThread* jt = (JavaThread*)thr;
-      ThreadToNativeFromVM native(jt);
-
-      MonitorLocker ml(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
-      while (g1h->old_marking_cycles_completed() <=
-                                          _old_marking_cycles_completed_before) {
-        ml.wait();
-      }
-    }
   }
 }
 
--- a/src/hotspot/share/gc/g1/g1VMOperations.hpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1VMOperations.hpp	Wed Nov 13 18:00:30 2019 -0500
@@ -45,29 +45,39 @@
     _gc_succeeded(false) { }
   virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
   virtual void doit();
-  bool gc_succeeded() { return _gc_succeeded; }
+  bool gc_succeeded() const { return _gc_succeeded; }
+};
+
+class VM_G1TryInitiateConcMark : public VM_GC_Operation {
+  double _target_pause_time_ms;
+  bool _transient_failure;
+  bool _cycle_already_in_progress;
+  bool _gc_succeeded;
+
+public:
+  VM_G1TryInitiateConcMark(uint gc_count_before,
+                           GCCause::Cause gc_cause,
+                           double target_pause_time_ms);
+  virtual VMOp_Type type() const { return VMOp_G1TryInitiateConcMark; }
+  virtual bool doit_prologue();
+  virtual void doit();
+  bool transient_failure() const { return _transient_failure; }
+  bool cycle_already_in_progress() const { return _cycle_already_in_progress; }
+  bool gc_succeeded() const { return _gc_succeeded; }
 };
 
 class VM_G1CollectForAllocation : public VM_CollectForAllocation {
   bool _gc_succeeded;
-
-  bool _should_initiate_conc_mark;
-  bool _should_retry_gc;
   double _target_pause_time_ms;
-  uint  _old_marking_cycles_completed_before;
 
 public:
   VM_G1CollectForAllocation(size_t         word_size,
                             uint           gc_count_before,
                             GCCause::Cause gc_cause,
-                            bool           should_initiate_conc_mark,
                             double         target_pause_time_ms);
   virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; }
-  virtual bool doit_prologue();
   virtual void doit();
-  virtual void doit_epilogue();
-  bool should_retry_gc() const { return _should_retry_gc; }
-  bool gc_succeeded() { return _gc_succeeded; }
+  bool gc_succeeded() const { return _gc_succeeded; }
 };
 
 // Concurrent G1 stop-the-world operations such as remark and cleanup.
--- a/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp	Wed Nov 13 18:00:30 2019 -0500
@@ -90,8 +90,7 @@
   if ((os::elapsedTime() - _last_periodic_gc_attempt_s) > (G1PeriodicGCInterval / 1000.0)) {
     log_debug(gc, periodic)("Checking for periodic GC.");
     if (should_start_periodic_gc()) {
-      if (!G1CollectedHeap::heap()->try_collect(GCCause::_g1_periodic_collection,
-                                                    false /* retry_on_vmop_failure */)) {
+      if (!G1CollectedHeap::heap()->try_collect(GCCause::_g1_periodic_collection)) {
         log_debug(gc, periodic)("GC request denied. Skipping.");
       }
     }
--- a/src/hotspot/share/runtime/mutexLocker.cpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/runtime/mutexLocker.cpp	Wed Nov 13 18:00:30 2019 -0500
@@ -72,6 +72,7 @@
 Monitor* CGC_lock                     = NULL;
 Monitor* STS_lock                     = NULL;
 Monitor* FullGCCount_lock             = NULL;
+Monitor* G1OldGCCount_lock            = NULL;
 Monitor* DirtyCardQ_CBL_mon           = NULL;
 Mutex*   Shared_DirtyCardQ_lock       = NULL;
 Mutex*   MarkStackFreeList_lock       = NULL;
@@ -203,6 +204,8 @@
 
   def(FullGCCount_lock             , PaddedMonitor, leaf,        true,  _safepoint_check_never);      // in support of ExplicitGCInvokesConcurrent
   if (UseG1GC) {
+    def(G1OldGCCount_lock          , PaddedMonitor, leaf,        true,  _safepoint_check_always);
+
     def(DirtyCardQ_CBL_mon         , PaddedMonitor, access,      true,  _safepoint_check_never);
     def(Shared_DirtyCardQ_lock     , PaddedMutex  , access + 1,  true,  _safepoint_check_never);
 
--- a/src/hotspot/share/runtime/mutexLocker.hpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/runtime/mutexLocker.hpp	Wed Nov 13 18:00:30 2019 -0500
@@ -68,6 +68,7 @@
                                                  // fore- & background GC threads.
 extern Monitor* STS_lock;                        // used for joining/leaving SuspendibleThreadSet.
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
+extern Monitor* G1OldGCCount_lock;               // in support of "concurrent" full gc
 extern Monitor* DirtyCardQ_CBL_mon;              // Protects dirty card Q
                                                  // completed buffer queue.
 extern Mutex*   Shared_DirtyCardQ_lock;          // Lock protecting dirty card
--- a/src/hotspot/share/runtime/vmOperations.hpp	Wed Nov 13 14:08:04 2019 -0800
+++ b/src/hotspot/share/runtime/vmOperations.hpp	Wed Nov 13 18:00:30 2019 -0500
@@ -66,6 +66,7 @@
   template(G1CollectForAllocation)                \
   template(G1CollectFull)                         \
   template(G1Concurrent)                          \
+  template(G1TryInitiateConcMark)                 \
   template(ZMarkStart)                            \
   template(ZMarkEnd)                              \
   template(ZRelocateStart)                        \