src/hotspot/share/runtime/safepoint.cpp
changeset 53775 5d20b085d893
parent 53646 043ae846819f
child 53895 b22d8ae270a2
--- a/src/hotspot/share/runtime/safepoint.cpp	Fri Feb 15 07:53:03 2019 -0500
+++ b/src/hotspot/share/runtime/safepoint.cpp	Fri Feb 15 14:15:10 2019 +0100
@@ -70,70 +70,63 @@
 #include "c1/c1_globals.hpp"
 #endif
 
-template <typename E>
-static void set_current_safepoint_id(E* event, int adjustment = 0) {
-  assert(event != NULL, "invariant");
-  event->set_safepointId(SafepointSynchronize::safepoint_counter() + adjustment);
-}
-
-static void post_safepoint_begin_event(EventSafepointBegin* event,
+static void post_safepoint_begin_event(EventSafepointBegin& event,
+                                       uint64_t safepoint_id,
                                        int thread_count,
                                        int critical_thread_count) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->set_totalThreadCount(thread_count);
-  event->set_jniCriticalThreadCount(critical_thread_count);
-  event->commit();
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_totalThreadCount(thread_count);
+    event.set_jniCriticalThreadCount(critical_thread_count);
+    event.commit();
+  }
 }
 
-static void post_safepoint_cleanup_event(EventSafepointCleanup* event) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->commit();
-}
-
-static void post_safepoint_synchronize_event(EventSafepointStateSynchronization* event,
-                                             int initial_number_of_threads,
-                                             int threads_waiting_to_block,
-                                             unsigned int iterations) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    // Group this event together with the ones committed after the counter is increased
-    set_current_safepoint_id(event, 1);
-    event->set_initialThreadCount(initial_number_of_threads);
-    event->set_runningThreadCount(threads_waiting_to_block);
-    event->set_iterations(iterations);
-    event->commit();
+static void post_safepoint_cleanup_event(EventSafepointCleanup& event, uint64_t safepoint_id) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.commit();
   }
 }
 
-static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked* event,
-                                              int initial_threads_waiting_to_block) {
-  assert(event != NULL, "invariant");
-  assert(event->should_commit(), "invariant");
-  set_current_safepoint_id(event);
-  event->set_runningThreadCount(initial_threads_waiting_to_block);
-  event->commit();
-}
-
-static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask* event,
-                                              const char* name) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    set_current_safepoint_id(event);
-    event->set_name(name);
-    event->commit();
+static void post_safepoint_synchronize_event(EventSafepointStateSynchronization& event,
+                                             uint64_t safepoint_id,
+                                             int initial_number_of_threads,
+                                             int threads_waiting_to_block,
+                                             uint64_t iterations) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_initialThreadCount(initial_number_of_threads);
+    event.set_runningThreadCount(threads_waiting_to_block);
+    event.set_iterations(iterations);
+    event.commit();
   }
 }
 
-static void post_safepoint_end_event(EventSafepointEnd* event) {
-  assert(event != NULL, "invariant");
-  if (event->should_commit()) {
-    // Group this event together with the ones committed before the counter increased
-    set_current_safepoint_id(event, -1);
-    event->commit();
+static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked& event,
+                                              uint64_t safepoint_id,
+                                              int initial_threads_waiting_to_block) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_runningThreadCount(initial_threads_waiting_to_block);
+    event.commit();
+  }
+}
+
+static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask& event,
+                                              uint64_t safepoint_id,
+                                              const char* name) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.set_name(name);
+    event.commit();
+  }
+}
+
+static void post_safepoint_end_event(EventSafepointEnd& event, uint64_t safepoint_id) {
+  if (event.should_commit()) {
+    event.set_safepointId(safepoint_id);
+    event.commit();
   }
 }
 
@@ -141,64 +134,170 @@
 // Implementation of Safepoint begin/end
 
 SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized;
-volatile int  SafepointSynchronize::_waiting_to_block = 0;
+int SafepointSynchronize::_waiting_to_block = 0;
 volatile uint64_t SafepointSynchronize::_safepoint_counter = 0;
+const uint64_t SafepointSynchronize::InactiveSafepointCounter = 0;
 int SafepointSynchronize::_current_jni_active_count = 0;
-long  SafepointSynchronize::_end_of_last_safepoint = 0;
-int SafepointSynchronize::_defer_thr_suspend_loop_count = 4000;
-static const int safepoint_spin_before_yield = 2000;
-static volatile int PageArmed = 0 ;        // safepoint polling page is RO|RW vs PROT_NONE
-static volatile int TryingToBlock = 0 ;    // proximate value -- for advisory use only
+long SafepointSynchronize::_end_of_last_safepoint = 0;
+
+WaitBarrier* SafepointSynchronize::_wait_barrier;
+
+// We need a place to save the desc since it is released before we need it.
+static char stopped_description[64] = "";
+static bool _vm_is_waiting = false;
+
+static volatile bool PageArmed = false;        // safepoint polling page is RO|RW vs PROT_NONE
 static bool timeout_error_printed = false;
 
-
-// Statistic related statics
+// Statistic related
 julong SafepointSynchronize::_coalesced_vmop_count = 0;
 static jlong _safepoint_begin_time = 0;
 static float _ts_of_current_safepoint = 0.0f;
 static volatile int _nof_threads_hit_polling_page = 0;
 
-// Roll all threads forward to a safepoint and suspend them all
-void SafepointSynchronize::begin() {
-  EventSafepointBegin begin_event;
-  Thread* myThread = Thread::current();
-  assert(myThread->is_VM_thread(), "Only VM thread may execute a safepoint");
+void SafepointSynchronize::init(Thread* vmthread) {
+  // WaitBarrier should never be destroyed since we will have
+  // threads waiting on it while exiting.
+  _wait_barrier = new WaitBarrier(vmthread);
+}
+
+void SafepointSynchronize::increment_jni_active_count() {
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may increment");
+  ++_current_jni_active_count;
+}
+
+void SafepointSynchronize::decrement_waiting_to_block() {
+  assert(_waiting_to_block > 0, "sanity check");
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may decrement");
+  --_waiting_to_block;
+}
+
+static bool thread_not_running(ThreadSafepointState *cur_state) {
+  if (!cur_state->is_running()) {
+    return true;
+  }
+  cur_state->examine_state_of_thread(SafepointSynchronize::safepoint_counter());
+  if (!cur_state->is_running()) {
+    return true;
+  }
+  LogTarget(Trace, safepoint) lt;
+  if (lt.is_enabled()) {
+    ResourceMark rm;
+    LogStream ls(lt);
+    cur_state->print_on(&ls);
+  }
+  return false;
+}
 
+#ifdef ASSERT
+static void assert_list_is_valid(const ThreadSafepointState* tss_head, int still_running) {
+  int a = 0;
+  const ThreadSafepointState *tmp_tss = tss_head;
+  while (tmp_tss != NULL) {
+    ++a;
+    assert(tmp_tss->is_running(), "Illegal initial state");
+    tmp_tss = tmp_tss->get_next();
+  }
+  assert(a == still_running, "Must be the same");
+}
+#endif // ASSERT
+
+static void back_off(int iteration) {
+  // iteration will be 1 the first time we enter this spin back-off.
+  // naked_short_nanosleep takes tenths of micros which means that
+  // number of nanoseconds is irrelevant if it's below that. We do
+  // 20 1 ns sleeps with a total cost of ~1 ms, then we do 1 ms sleeps.
+  jlong sleep_ns = 1;
+  if (iteration > 20) {
+    sleep_ns = NANOUNITS / MILLIUNITS;  // 1 ms
+  }
+  os::naked_short_nanosleep(sleep_ns);
+}
+
+int SafepointSynchronize::synchronize_threads(jlong safepoint_limit_time, int nof_threads, int* initial_running)
+{
+  JavaThreadIteratorWithHandle jtiwh;
+
+#ifdef ASSERT
+  for (; JavaThread *cur = jtiwh.next(); ) {
+    assert(cur->safepoint_state()->is_running(), "Illegal initial state");
+  }
+  jtiwh.rewind();
+#endif // ASSERT
+
+  // Iterate through all threads until it has been determined how to stop them all at a safepoint.
+  int still_running = nof_threads;
+  ThreadSafepointState *tss_head = NULL;
+  ThreadSafepointState **p_prev = &tss_head;
+  for (; JavaThread *cur = jtiwh.next(); ) {
+    ThreadSafepointState *cur_tss = cur->safepoint_state();
+    assert(cur_tss->get_next() == NULL, "Must be NULL");
+    if (thread_not_running(cur_tss)) {
+      --still_running;
+    } else {
+      *p_prev = cur_tss;
+      p_prev = cur_tss->next_ptr();
+    }
+  }
+  *p_prev = NULL;
+
+  DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);)
+
+  *initial_running = still_running;
   if (log_is_enabled(Debug, safepoint, stats)) {
-    _safepoint_begin_time = os::javaTimeNanos();
-    _ts_of_current_safepoint = tty->time_stamp().seconds();
-    _nof_threads_hit_polling_page = 0;
+    begin_statistics(nof_threads, still_running);
   }
 
-  Universe::heap()->safepoint_synchronize_begin();
-
-  // By getting the Threads_lock, we assure that no threads are about to start or
-  // exit. It is released again in SafepointSynchronize::end().
-  Threads_lock->lock();
+  int iterations = 1; // The first iteration is above.
 
-  assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state");
-
-  int nof_threads = Threads::number_of_threads();
-
-  log_debug(safepoint)("Safepoint synchronization initiated. (%d threads)", nof_threads);
-
-  RuntimeService::record_safepoint_begin();
+  while (still_running > 0) {
+    // Check if this has taken too long:
+    if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
+      print_safepoint_timeout(_spinning_timeout);
+    }
+    if (int(iterations) == -1) { // overflow - something is wrong.
+      // We can only overflow here when we are using global
+      // polling pages. We keep this guarantee in its original
+      // form so that searches of the bug database for this
+      // failure mode find the right bugs.
+      guarantee (!PageArmed, "invariant");
+    }
 
-  MutexLocker mu(Safepoint_lock);
-
-  // Reset the count of active JNI critical threads
-  _current_jni_active_count = 0;
+    p_prev = &tss_head;
+    ThreadSafepointState *cur_tss = tss_head;
+    while (cur_tss != NULL) {
+      assert(cur_tss->is_running(), "Illegal initial state");
+      if (thread_not_running(cur_tss)) {
+        --still_running;
+        *p_prev = NULL;
+        ThreadSafepointState *tmp = cur_tss;
+        cur_tss = cur_tss->get_next();
+        tmp->set_next(NULL);
+      } else {
+        *p_prev = cur_tss;
+        p_prev = cur_tss->next_ptr();
+        cur_tss = cur_tss->get_next();
+      }
+    }
 
-  // Set number of threads to wait for, before we initiate the callbacks
-  _waiting_to_block = nof_threads;
-  TryingToBlock     = 0 ;
-  int still_running = nof_threads;
+    DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);)
+
+    if (still_running > 0) {
+      back_off(iterations);
+    }
+
+    iterations++;
+  }
 
-  // Save the starting time, so that it can be compared to see if this has taken
-  // too long to complete.
-  jlong safepoint_limit_time = 0;
-  timeout_error_printed = false;
+  assert(tss_head == NULL, "Must be empty");
 
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    update_statistics_on_spin_end();
+  }
+  return iterations;
+}
+
+void SafepointSynchronize::arm_safepoint() {
   // Begin the process of bringing the system to a safepoint.
   // Java threads can be in several different states and are
   // stopped by different mechanisms:
@@ -216,7 +315,7 @@
   //     memory writes are serialized with respect to each other,
   //     the VM thread issues a memory barrier instruction.
   //  3. Running compiled Code
-  //     Compiled code reads a global (Safepoint Polling) page that
+  //     Compiled code reads the local polling page that
   //     is set to fault if we are trying to get to a safepoint.
   //  4. Blocked
   //     A thread which is blocked will not be allowed to return from the
@@ -226,275 +325,154 @@
   //     between states, the safepointing code will wait for the thread to
   //     block itself when it attempts transitions to a new state.
   //
-  {
-    EventSafepointStateSynchronization sync_event;
-    int initial_running = 0;
-
-    _state            = _synchronizing;
-
-    if (SafepointMechanism::uses_thread_local_poll()) {
-      // Arming the per thread poll while having _state != _not_synchronized means safepointing
-      log_trace(safepoint)("Setting thread local yield flag for threads");
-      OrderAccess::storestore(); // storestore, global state -> local state
-      for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
-        // Make sure the threads start polling, it is time to yield.
-        SafepointMechanism::arm_local_poll(cur);
-      }
-    }
-    OrderAccess::fence(); // storestore|storeload, global state -> local state
-
-    if (SafepointMechanism::uses_global_page_poll()) {
-      // Make interpreter safepoint aware
-      Interpreter::notice_safepoints();
-
-      // Make polling safepoint aware
-      guarantee (PageArmed == 0, "invariant") ;
-      PageArmed = 1 ;
-      os::make_polling_page_unreadable();
-    }
-
-    // Consider using active_processor_count() ... but that call is expensive.
-    int ncpus = os::processor_count() ;
-    unsigned int iterations = 0;
-
-    {
-      JavaThreadIteratorWithHandle jtiwh;
-#ifdef ASSERT
-      for (; JavaThread *cur = jtiwh.next(); ) {
-        assert(cur->safepoint_state()->is_running(), "Illegal initial state");
-        // Clear the visited flag to ensure that the critical counts are collected properly.
-        cur->set_visited_for_critical_count(false);
-      }
-#endif // ASSERT
 
-      if (SafepointTimeout)
-        safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
+  // We must never miss a thread with correct safepoint id, so we must make sure we arm
+  // the wait barrier for the next safepoint id/counter.
+  // Arming must be done after resetting _current_jni_active_count, _waiting_to_block.
+  _wait_barrier->arm(static_cast<int>(_safepoint_counter + 1));
 
-      // Iterate through all threads until it have been determined how to stop them all at a safepoint
-      int steps = 0 ;
-      while(still_running > 0) {
-        jtiwh.rewind();
-        for (; JavaThread *cur = jtiwh.next(); ) {
-          assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended");
-          ThreadSafepointState *cur_state = cur->safepoint_state();
-          if (cur_state->is_running()) {
-            cur_state->examine_state_of_thread();
-            if (!cur_state->is_running()) {
-              still_running--;
-              // consider adjusting steps downward:
-              //   steps = 0
-              //   steps -= NNN
-              //   steps >>= 1
-              //   steps = MIN(steps, 2000-100)
-              //   if (iterations != 0) steps -= NNN
-            }
-            LogTarget(Trace, safepoint) lt;
-            if (lt.is_enabled()) {
-              ResourceMark rm;
-              LogStream ls(lt);
-              cur_state->print_on(&ls);
-            }
-          }
-        }
-
-        if (iterations == 0) {
-          initial_running = still_running;
-          if (log_is_enabled(Debug, safepoint, stats)) {
-            begin_statistics(nof_threads, still_running);
-          }
-        }
-
-        if (still_running > 0) {
-          // Check for if it takes to long
-          if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) {
-            print_safepoint_timeout(_spinning_timeout);
-          }
+  assert((_safepoint_counter & 0x1) == 0, "must be even");
+  // The store to _safepoint_counter must happen after any stores in arming.
+  OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1);
 
-          // Spin to avoid context switching.
-          // There's a tension between allowing the mutators to run (and rendezvous)
-          // vs spinning.  As the VM thread spins, wasting cycles, it consumes CPU that
-          // a mutator might otherwise use profitably to reach a safepoint.  Excessive
-          // spinning by the VM thread on a saturated system can increase rendezvous latency.
-          // Blocking or yielding incur their own penalties in the form of context switching
-          // and the resultant loss of $ residency.
-          //
-          // Further complicating matters is that yield() does not work as naively expected
-          // on many platforms -- yield() does not guarantee that any other ready threads
-          // will run.   As such we revert to naked_short_sleep() after some number of iterations.
-          // nakes_short_sleep() is implemented as a short unconditional sleep.
-          // Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping
-          // can actually increase the time it takes the VM thread to detect that a system-wide
-          // stop-the-world safepoint has been reached.  In a pathological scenario such as that
-          // described in CR6415670 the VMthread may sleep just before the mutator(s) become safe.
-          // In that case the mutators will be stalled waiting for the safepoint to complete and the
-          // the VMthread will be sleeping, waiting for the mutators to rendezvous.  The VMthread
-          // will eventually wake up and detect that all mutators are safe, at which point
-          // we'll again make progress.
-          //
-          // Beware too that that the VMThread typically runs at elevated priority.
-          // Its default priority is higher than the default mutator priority.
-          // Obviously, this complicates spinning.
-          //
-          // Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0).
-          // Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will.
-          //
-          // See the comments in synchronizer.cpp for additional remarks on spinning.
-          //
-          // In the future we might:
-          // -- Modify the safepoint scheme to avoid potentially unbounded spinning.
-          //    This is tricky as the path used by a thread exiting the JVM (say on
-          //    on JNI call-out) simply stores into its state field.  The burden
-          //    is placed on the VM thread, which must poll (spin).
-          // -- Find something useful to do while spinning.  If the safepoint is GC-related
-          //    we might aggressively scan the stacks of threads that are already safe.
-          // -- YieldTo() any still-running mutators that are ready but OFFPROC.
-          // -- Check system saturation.  If the system is not fully saturated then
-          //    simply spin and avoid sleep/yield.
-          // -- As still-running mutators rendezvous they could unpark the sleeping
-          //    VMthread.  This works well for still-running mutators that become
-          //    safe.  The VMthread must still poll for mutators that call-out.
-          // -- Drive the policy on time-since-begin instead of iterations.
-          // -- Consider making the spin duration a function of the # of CPUs:
-          //    Spin = (((ncpus-1) * M) + K) + F(still_running)
-          //    Alternately, instead of counting iterations of the outer loop
-          //    we could count the # of threads visited in the inner loop, above.
-          // -- On windows consider using the return value from SwitchThreadTo()
-          //    to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions.
+  // We are synchronizing
+  OrderAccess::storestore(); // Ordered with _safepoint_counter
+  _state = _synchronizing;
 
-          if (int(iterations) == -1) { // overflow - something is wrong.
-            // We can only overflow here when we are using global
-            // polling pages. We keep this guarantee in its original
-            // form so that searches of the bug database for this
-            // failure mode find the right bugs.
-            guarantee (PageArmed == 0, "invariant");
-          }
-
-          // Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or
-          // ((still_running + _waiting_to_block - TryingToBlock)) < ncpus)
-          ++steps ;
-          if (ncpus > 1 && steps < safepoint_spin_before_yield) {
-            SpinPause() ;     // MP-Polite spin
-          } else
-            if (steps < _defer_thr_suspend_loop_count) {
-              os::naked_yield() ;
-            } else {
-              os::naked_short_sleep(1);
-            }
-
-          iterations ++ ;
-        }
-        assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long");
-      }
-    } // ThreadsListHandle destroyed here.
-    assert(still_running == 0, "sanity check");
-
-    if (log_is_enabled(Debug, safepoint, stats)) {
-      update_statistics_on_spin_end();
-    }
-    if (sync_event.should_commit()) {
-      post_safepoint_synchronize_event(&sync_event, initial_running, _waiting_to_block, iterations);
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    // Arming the per thread poll while having _state != _not_synchronized means safepointing
+    log_trace(safepoint)("Setting thread local yield flag for threads");
+    OrderAccess::storestore(); // storestore, global state -> local state
+    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
+      // Make sure the threads start polling, it is time to yield.
+      SafepointMechanism::arm_local_poll(cur);
     }
   }
+  OrderAccess::fence(); // storestore|storeload, global state -> local state
 
-  // wait until all threads are stopped
-  {
-    EventSafepointWaitBlocked wait_blocked_event;
-    int initial_waiting_to_block = _waiting_to_block;
+  if (SafepointMechanism::uses_global_page_poll()) {
+    // Make interpreter safepoint aware
+    Interpreter::notice_safepoints();
+
+    // Make polling safepoint aware
+    guarantee (!PageArmed, "invariant") ;
+    PageArmed = true;
+    os::make_polling_page_unreadable();
+  }
+}
+
+// Roll all threads forward to a safepoint and suspend them all
+void SafepointSynchronize::begin() {
+  EventSafepointBegin begin_event;
+  assert(Thread::current()->is_VM_thread(), "Only VM thread may execute a safepoint");
+
+  strncpy(stopped_description, VMThread::vm_safepoint_description(), sizeof(stopped_description) - 1);
+  stopped_description[sizeof(stopped_description) - 1] = '\0';
+
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    _safepoint_begin_time = os::javaTimeNanos();
+    _ts_of_current_safepoint = tty->time_stamp().seconds();
+    _nof_threads_hit_polling_page = 0;
+  }
+
+  Universe::heap()->safepoint_synchronize_begin();
+
+  // By getting the Threads_lock, we assure that no threads are about to start or
+  // exit. It is released again in SafepointSynchronize::end().
+  Threads_lock->lock();
+
+  assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state");
 
-    while (_waiting_to_block > 0) {
-      log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block);
-      if (!SafepointTimeout || timeout_error_printed) {
-        Safepoint_lock->wait(true);  // true, means with no safepoint checks
-      } else {
-        // Compute remaining time
-        jlong remaining_time = safepoint_limit_time - os::javaTimeNanos();
+  int nof_threads = Threads::number_of_threads();
+
+  log_debug(safepoint)("Safepoint synchronization initiated using %s wait barrier. (%d threads)", _wait_barrier->description(), nof_threads);
+
+  RuntimeService::record_safepoint_begin();
+
+  // Reset the count of active JNI critical threads
+  _current_jni_active_count = 0;
+
+  // Set number of threads to wait for
+  _waiting_to_block = nof_threads;
 
-        // If there is no remaining time, then there is an error
-        if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) {
-          print_safepoint_timeout(_blocking_timeout);
-        }
-      }
-    }
-    assert(_waiting_to_block == 0, "sanity check");
+  jlong safepoint_limit_time = 0;
+  if (SafepointTimeout) {
+    // Set the limit time, so that it can be compared to see if this has taken
+    // too long to complete.
+    safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS;
+  }
+  timeout_error_printed = false;
+
+  EventSafepointStateSynchronization sync_event;
+  int initial_running = 0;
+
+  // Arms the safepoint, _current_jni_active_count and _waiting_to_block must be set before.
+  arm_safepoint();
+
+  // Will spin until all threads are safe.
+  int iterations = synchronize_threads(safepoint_limit_time, nof_threads, &initial_running);
+  assert(_waiting_to_block == 0, "No thread should be running");
+
+  post_safepoint_synchronize_event(sync_event, _safepoint_counter, initial_running,
+                                   _waiting_to_block, iterations);
+
+  // Keep event from now.
+  EventSafepointWaitBlocked wait_blocked_event;
 
 #ifndef PRODUCT
-    if (SafepointTimeout) {
-      jlong current_time = os::javaTimeNanos();
-      if (safepoint_limit_time < current_time) {
-        log_warning(safepoint)("# SafepointSynchronize: Finished after "
-                      INT64_FORMAT_W(6) " ms",
-                      (int64_t)((current_time - safepoint_limit_time) / MICROUNITS +
-                                (jlong)SafepointTimeoutDelay));
-      }
+  if (SafepointTimeout) {
+    jlong current_time = os::javaTimeNanos();
+    if (safepoint_limit_time < current_time) {
+      log_warning(safepoint)("# SafepointSynchronize: Finished after "
+                    INT64_FORMAT_W(6) " ms",
+                    (int64_t)((current_time - safepoint_limit_time) / MICROUNITS +
+                              (jlong)SafepointTimeoutDelay));
     }
+  }
 #endif
 
-    assert((_safepoint_counter & 0x1) == 0, "must be even");
-    assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
-    _safepoint_counter ++;
+  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
 
-    // Record state
-    _state = _synchronized;
+  // Record state
+  _state = _synchronized;
 
-    OrderAccess::fence();
-    if (wait_blocked_event.should_commit()) {
-      post_safepoint_wait_blocked_event(&wait_blocked_event, initial_waiting_to_block);
-    }
-  }
+  OrderAccess::fence();
+
+  post_safepoint_wait_blocked_event(wait_blocked_event, _safepoint_counter, 0);
 
 #ifdef ASSERT
   // Make sure all the threads were visited.
   for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) {
-    assert(cur->was_visited_for_critical_count(), "missed a thread");
+    assert(cur->was_visited_for_critical_count(_safepoint_counter), "missed a thread");
   }
 #endif // ASSERT
 
   // Update the count of active JNI critical regions
   GCLocker::set_jni_lock_count(_current_jni_active_count);
 
-  log_info(safepoint)("Entering safepoint region: %s", VMThread::vm_safepoint_description());
+  log_info(safepoint)("Entering safepoint region: %s", stopped_description);
 
   RuntimeService::record_safepoint_synchronized();
   if (log_is_enabled(Debug, safepoint, stats)) {
     update_statistics_on_sync_end(os::javaTimeNanos());
   }
 
-  // Call stuff that needs to be run when a safepoint is just about to be completed
-  {
-    EventSafepointCleanup cleanup_event;
-    do_cleanup_tasks();
-    if (cleanup_event.should_commit()) {
-      post_safepoint_cleanup_event(&cleanup_event);
-    }
-  }
+  // We do the safepoint cleanup first since a GC related safepoint
+  // needs cleanup to be completed before running the GC op.
+  EventSafepointCleanup cleanup_event;
+  do_cleanup_tasks();
+  post_safepoint_cleanup_event(cleanup_event, _safepoint_counter);
 
   if (log_is_enabled(Debug, safepoint, stats)) {
     // Record how much time spend on the above cleanup tasks
     update_statistics_on_cleanup_end(os::javaTimeNanos());
   }
 
-  if (begin_event.should_commit()) {
-    post_safepoint_begin_event(&begin_event, nof_threads, _current_jni_active_count);
-  }
+  post_safepoint_begin_event(begin_event, _safepoint_counter, nof_threads, _current_jni_active_count);
 }
 
-// Wake up all threads, so they are ready to resume execution after the safepoint
-// operation has been carried out
-void SafepointSynchronize::end() {
-  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
-  assert((_safepoint_counter & 0x1) == 1, "must be odd");
-  EventSafepointEnd event;
-  _safepoint_counter ++;
-  // memory fence isn't required here since an odd _safepoint_counter
-  // value can do no harm and a fence is issued below anyway.
-
-  DEBUG_ONLY(Thread* myThread = Thread::current();)
-  assert(myThread->is_VM_thread(), "Only VM thread can execute a safepoint");
-
-  if (log_is_enabled(Debug, safepoint, stats)) {
-    end_statistics(os::javaTimeNanos());
-  }
-
+void SafepointSynchronize::disarm_safepoint() {
+  uint64_t safepoint_id = _safepoint_counter;
   {
     JavaThreadIteratorWithHandle jtiwh;
 #ifdef ASSERT
@@ -508,66 +486,74 @@
     }
 #endif // ASSERT
 
-    if (PageArmed) {
-      assert(SafepointMechanism::uses_global_page_poll(), "sanity");
+    if (SafepointMechanism::uses_global_page_poll()) {
+      guarantee (PageArmed, "invariant");
       // Make polling safepoint aware
       os::make_polling_page_readable();
-      PageArmed = 0 ;
-    }
-
-    if (SafepointMechanism::uses_global_page_poll()) {
+      PageArmed = false;
       // Remove safepoint check from interpreter
       Interpreter::ignore_safepoints();
     }
 
-    {
-      MutexLocker mu(Safepoint_lock);
+    OrderAccess::fence(); // keep read and write of _state from floating up
+    assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization");
+
+    // Change state first to _not_synchronized.
+    // No threads should see _synchronized when running.
+    _state = _not_synchronized;
+
+    // Set the next dormant (even) safepoint id.
+    assert((_safepoint_counter & 0x1) == 1, "must be odd");
+    OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1);
 
-      assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization");
+    OrderAccess::fence(); // Keep the local state from floating up.
+
+    jtiwh.rewind();
+    for (; JavaThread *current = jtiwh.next(); ) {
+      // Clear the visited flag to ensure that the critical counts are collected properly.
+      DEBUG_ONLY(current->reset_visited_for_critical_count(safepoint_id);)
+      ThreadSafepointState* cur_state = current->safepoint_state();
+      assert(!cur_state->is_running(), "Thread not suspended at safepoint");
+      cur_state->restart(); // TSS _running
+      assert(cur_state->is_running(), "safepoint state has not been reset");
+      SafepointMechanism::disarm_local_poll(current);
+    }
+  } // ~JavaThreadIteratorWithHandle
 
-      if (SafepointMechanism::uses_thread_local_poll()) {
-        _state = _not_synchronized;
-        OrderAccess::storestore(); // global state -> local state
-        jtiwh.rewind();
-        for (; JavaThread *current = jtiwh.next(); ) {
-          ThreadSafepointState* cur_state = current->safepoint_state();
-          cur_state->restart(); // TSS _running
-          SafepointMechanism::disarm_local_poll(current);
-        }
-        log_info(safepoint)("Leaving safepoint region");
-      } else {
-        // Set to not synchronized, so the threads will not go into the signal_thread_blocked method
-        // when they get restarted.
-        _state = _not_synchronized;
-        OrderAccess::fence();
+  log_info(safepoint)("Leaving safepoint region");
+
+  RuntimeService::record_safepoint_end();
 
-        log_info(safepoint)("Leaving safepoint region");
+  // Release threads lock, so threads can be created/destroyed again.
+  Threads_lock->unlock();
+
+  // Wake threads after local state is correctly set.
+  _wait_barrier->disarm();
+}
 
-        // Start suspended threads
-        jtiwh.rewind();
-        for (; JavaThread *current = jtiwh.next(); ) {
-          ThreadSafepointState* cur_state = current->safepoint_state();
-          assert(cur_state->type() != ThreadSafepointState::_running, "Thread not suspended at safepoint");
-          cur_state->restart();
-          assert(cur_state->is_running(), "safepoint state has not been reset");
-        }
-      }
+// Wake up all threads, so they are ready to resume execution after the safepoint
+// operation has been carried out
+void SafepointSynchronize::end() {
+  assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
+  EventSafepointEnd event;
+  uint64_t safepoint_id = _safepoint_counter;
+  assert(Thread::current()->is_VM_thread(), "Only VM thread can execute a safepoint");
 
-      RuntimeService::record_safepoint_end();
+  if (log_is_enabled(Debug, safepoint, stats)) {
+    end_statistics(os::javaTimeNanos());
+  }
 
-      // Release threads lock, so threads can be created/destroyed again.
-      // It will also release all threads blocked in signal_thread_blocked.
-      Threads_lock->unlock();
-    }
-  } // ThreadsListHandle destroyed here.
+  disarm_safepoint();
+
+  RuntimeService::record_safepoint_epilog(stopped_description);
 
   Universe::heap()->safepoint_synchronize_end();
+
   // record this time so VMThread can keep track how much time has elapsed
   // since last safepoint.
   _end_of_last_safepoint = os::javaTimeMillis();
-  if (event.should_commit()) {
-    post_safepoint_end_event(&event);
-  }
+
+  post_safepoint_end_event(event, safepoint_id);
 }
 
 bool SafepointSynchronize::is_cleanup_needed() {
@@ -613,6 +599,7 @@
     _counters(counters) {}
 
   void work(uint worker_id) {
+    uint64_t safepoint_id = SafepointSynchronize::safepoint_counter();
     // All threads deflate monitors and mark nmethods (if necessary).
     Threads::possibly_parallel_threads_do(true, &_cleanup_threads_cl);
 
@@ -621,9 +608,8 @@
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
       ObjectSynchronizer::deflate_idle_monitors(_counters);
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     }
 
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES)) {
@@ -631,9 +617,8 @@
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
       InlineCacheBuffer::update_inline_caches();
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     }
 
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_COMPILATION_POLICY)) {
@@ -641,9 +626,8 @@
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
       CompilationPolicy::policy()->do_safepoint_work();
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     }
 
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH)) {
@@ -652,9 +636,8 @@
         EventSafepointCleanupTask event;
         TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
         SymbolTable::rehash_table();
-        if (event.should_commit()) {
-          post_safepoint_cleanup_task_event(&event, name);
-        }
+
+        post_safepoint_cleanup_task_event(event, safepoint_id, name);
       }
     }
 
@@ -664,9 +647,8 @@
         EventSafepointCleanupTask event;
         TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
         StringTable::rehash_table();
-        if (event.should_commit()) {
-          post_safepoint_cleanup_task_event(&event, name);
-        }
+
+        post_safepoint_cleanup_task_event(event, safepoint_id, name);
       }
     }
 
@@ -677,9 +659,8 @@
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
       ClassLoaderDataGraph::purge_if_needed();
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     }
 
     if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYSTEM_DICTIONARY_RESIZE)) {
@@ -687,9 +668,8 @@
       EventSafepointCleanupTask event;
       TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
       ClassLoaderDataGraph::resize_if_needed();
-      if (event.should_commit()) {
-        post_safepoint_cleanup_task_event(&event, name);
-      }
+
+      post_safepoint_cleanup_task_event(event, safepoint_id, name);
     }
 
     _subtasks.all_tasks_completed(_num_workers);
@@ -736,15 +716,48 @@
   assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer");
 }
 
+// Methods for determining if a JavaThread is safepoint safe.
 
-bool SafepointSynchronize::safepoint_safe(JavaThread *thread, JavaThreadState state) {
+// False means unsafe with undetermined state.
+// True means a determined state, but it may be an unsafe state.
+// If called from a non-safepoint context safepoint_count MUST be InactiveSafepointCounter.
+bool SafepointSynchronize::try_stable_load_state(JavaThreadState *state, JavaThread *thread, uint64_t safepoint_count) {
+  assert((safepoint_count != InactiveSafepointCounter &&
+          Thread::current() == (Thread*)VMThread::vm_thread() &&
+          SafepointSynchronize::_state != _not_synchronized)
+         || safepoint_count == InactiveSafepointCounter, "Invalid check");
+
+  // To handle the thread_blocked state on the backedge of the WaitBarrier from
+  // previous safepoint and reading the reset value (0/InactiveSafepointCounter) we
+  // re-read state after we read thread safepoint id. The JavaThread changes its
+  // thread state from thread_blocked before resetting safepoint id to 0.
+  // This guarantees the second read will be from an updated thread state. It can
+  // either be different state making this an unsafe state or it can see blocked
+  // again. When we see blocked twice with a 0 safepoint id, either:
+  // - It is normally blocked, e.g. on Mutex, TBIVM.
+  // - It was in SS:block(), looped around to SS:block() and is blocked on the WaitBarrier.
+  // - It was in SS:block() but now on a Mutex.
+  // All of these cases are safe.
+
+  *state = thread->thread_state();
+  OrderAccess::loadload();
+  uint64_t sid = thread->safepoint_state()->get_safepoint_id();  // Load acquire
+  if (sid != InactiveSafepointCounter && sid != safepoint_count) {
+    // In an old safepoint, state not relevant.
+    return false;
+  }
+  return *state == thread->thread_state();
+}
+
+static bool safepoint_safe_with(JavaThread *thread, JavaThreadState state) {
   switch(state) {
   case _thread_in_native:
     // native threads are safe if they have no java stack or have walkable stack
     return !thread->has_last_Java_frame() || thread->frame_anchor()->walkable();
 
-   // blocked threads should have already have walkable stack
   case _thread_blocked:
+    // On wait_barrier or blocked.
+    // Blocked threads should already have walkable stack.
     assert(!thread->has_last_Java_frame() || thread->frame_anchor()->walkable(), "blocked and not walkable");
     return true;
 
@@ -753,12 +766,28 @@
   }
 }
 
+bool SafepointSynchronize::handshake_safe(JavaThread *thread) {
+  // The polls must be armed otherwise the safe state can change to unsafe at any time.
+  assert(SafepointMechanism::should_block(thread), "Must be armed");
+  // This function must be called with the Threads_lock held so an externally
+  // suspended thread cannot be resumed thus it is safe.
+  assert(Threads_lock->owned_by_self() && Thread::current()->is_VM_thread(),
+         "Must hold Threads_lock and be VMThread");
+  if (thread->is_ext_suspended() || thread->is_terminated()) {
+    return true;
+  }
+  JavaThreadState stable_state;
+  if (try_stable_load_state(&stable_state, thread, InactiveSafepointCounter)) {
+    return safepoint_safe_with(thread, stable_state);
+  }
+  return false;
+}
 
 // See if the thread is running inside a lazy critical native and
 // update the thread critical count if so.  Also set a suspend flag to
 // cause the native wrapper to return into the JVM to do the unlock
 // once the native finishes.
-void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
+static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) {
   if (state == _thread_in_native &&
       thread->has_last_Java_frame() &&
       thread->frame_anchor()->walkable()) {
@@ -788,12 +817,10 @@
   }
 }
 
-
+// -------------------------------------------------------------------------------------------------------
+// Implementation of Safepoint blocking point
 
-// -------------------------------------------------------------------------------------------------------
-// Implementation of Safepoint callback point
-
-void SafepointSynchronize::block(JavaThread *thread, bool block_in_safepoint_check) {
+void SafepointSynchronize::block(JavaThread *thread) {
   assert(thread != NULL, "thread must be set");
   assert(thread->is_Java_thread(), "not a Java thread");
 
@@ -813,101 +840,45 @@
   JavaThreadState state = thread->thread_state();
   thread->frame_anchor()->make_walkable(thread);
 
+  uint64_t safepoint_id = SafepointSynchronize::safepoint_counter();
   // Check that we have a valid thread_state at this point
   switch(state) {
     case _thread_in_vm_trans:
     case _thread_in_Java:        // From compiled code
-
-      // We are highly likely to block on the Safepoint_lock. In order to avoid blocking in this case,
-      // we pretend we are still in the VM.
-      thread->set_thread_state(_thread_in_vm);
-
-      if (is_synchronizing()) {
-         Atomic::inc (&TryingToBlock) ;
-      }
-
-      // We will always be holding the Safepoint_lock when we are examine the state
-      // of a thread. Hence, the instructions between the Safepoint_lock->lock() and
-      // Safepoint_lock->unlock() are happening atomic with regards to the safepoint code
-      Safepoint_lock->lock_without_safepoint_check();
-      if (is_synchronizing()) {
-        // Decrement the number of threads to wait for and signal vm thread
-        assert(_waiting_to_block > 0, "sanity check");
-        _waiting_to_block--;
-        thread->safepoint_state()->set_has_called_back(true);
-
-        DEBUG_ONLY(thread->set_visited_for_critical_count(true));
-        if (thread->in_critical()) {
-          // Notice that this thread is in a critical section
-          increment_jni_active_count();
-        }
-
-        // Consider (_waiting_to_block < 2) to pipeline the wakeup of the VM thread
-        if (_waiting_to_block == 0) {
-          Safepoint_lock->notify_all();
-        }
-      }
-
-      if (block_in_safepoint_check) {
-        // We transition the thread to state _thread_blocked here, but
-        // we can't do our usual check for external suspension and then
-        // self-suspend after the lock_without_safepoint_check() call
-        // below because we are often called during transitions while
-        // we hold different locks. That would leave us suspended while
-        // holding a resource which results in deadlocks.
-        thread->set_thread_state(_thread_blocked);
-        Safepoint_lock->unlock();
-
-        // We now try to acquire the threads lock. Since this lock is hold by the VM thread during
-        // the entire safepoint, the threads will all line up here during the safepoint.
-        Threads_lock->lock_without_safepoint_check();
-        // restore original state. This is important if the thread comes from compiled code, so it
-        // will continue to execute with the _thread_in_Java state.
-        thread->set_thread_state(state);
-        Threads_lock->unlock();
-      } else {
-        // We choose not to block in this call since we would be
-        // caught when transitioning back anyways if the safepoint
-        // is still going on.
-        thread->set_thread_state(state);
-        Safepoint_lock->unlock();
-      }
-      break;
-
     case _thread_in_native_trans:
     case _thread_blocked_trans:
     case _thread_new_trans:
-      if (thread->safepoint_state()->type() == ThreadSafepointState::_call_back &&
-          block_in_safepoint_check) {
-        thread->print_thread_state();
-        fatal("Deadlock in safepoint code.  "
-              "Should have called back to the VM before blocking.");
-      }
+
+      // We have no idea where the VMThread is, it might even be at next safepoint.
+      // So we can miss this poll, but stop at next.
 
-      // We transition the thread to state _thread_blocked here, but
-      // we can't do our usual check for external suspension and then
-      // self-suspend after the lock_without_safepoint_check() call
-      // below because we are often called during transitions while
-      // we hold different locks. That would leave us suspended while
-      // holding a resource which results in deadlocks.
+      // Load dependent store, it must not pass loading of safepoint_id.
+      thread->safepoint_state()->set_safepoint_id(safepoint_id); // Release store
+
+      // This part we can skip if we notice we miss or are in a future safepoint.
+      OrderAccess::storestore();
       thread->set_thread_state(_thread_blocked);
 
-      // It is not safe to suspend a thread if we discover it is in _thread_in_native_trans. Hence,
-      // the safepoint code might still be waiting for it to block. We need to change the state here,
-      // so it can see that it is at a safepoint.
+      OrderAccess::fence(); // Load in wait barrier should not float up
+      _wait_barrier->wait(static_cast<int>(safepoint_id));
+      assert(_state != _synchronized, "Can't be");
 
-      // Block until the safepoint operation is completed.
-      Threads_lock->lock_without_safepoint_check();
-
-      // Restore state
+      // If barrier is disarmed stop store from floating above loads in barrier.
+      OrderAccess::loadstore();
       thread->set_thread_state(state);
 
-      Threads_lock->unlock();
+      // Then we reset the safepoint id to inactive.
+      thread->safepoint_state()->reset_safepoint_id(); // Release store
+
+      OrderAccess::fence();
+
       break;
 
     default:
      fatal("Illegal threadstate encountered: %d", state);
   }
+  guarantee(thread->safepoint_state()->get_safepoint_id() == InactiveSafepointCounter,
+            "The safepoint id should be set only in block path");
 
   // Check for pending. async. exceptions or suspends - except if the
   // thread was blocked inside the VM. has_special_runtime_exit_condition()
@@ -979,7 +950,7 @@
 
         if (cur_thread->thread_state() != _thread_blocked &&
           ((reason == _spinning_timeout && cur_state->is_running()) ||
-             (reason == _blocking_timeout && !cur_state->has_called_back()))) {
+             (reason == _blocking_timeout))) {
           ls.print("# ");
           cur_thread->print_on(&ls);
           ls.cr();
@@ -1001,11 +972,10 @@
 // -------------------------------------------------------------------------------------------------------
 // Implementation of ThreadSafepointState
 
-ThreadSafepointState::ThreadSafepointState(JavaThread *thread) {
-  _thread = thread;
-  _type   = _running;
-  _has_called_back = false;
-  _at_poll_safepoint = false;
+ThreadSafepointState::ThreadSafepointState(JavaThread *thread)
+  : _at_poll_safepoint(false), _thread(thread), _safepoint_safe(false),
+    _safepoint_id(SafepointSynchronize::InactiveSafepointCounter),
+    _orig_thread_state(_thread_uninitialized), _next(NULL) {
 }
 
 void ThreadSafepointState::create(JavaThread *thread) {
@@ -1020,13 +990,30 @@
   }
 }
 
-void ThreadSafepointState::examine_state_of_thread() {
+uint64_t ThreadSafepointState::get_safepoint_id() const {
+  return OrderAccess::load_acquire(&_safepoint_id);
+}
+
+void ThreadSafepointState::reset_safepoint_id() {
+  OrderAccess::release_store(&_safepoint_id, SafepointSynchronize::InactiveSafepointCounter);
+}
+
+void ThreadSafepointState::set_safepoint_id(uint64_t safepoint_id) {
+  OrderAccess::release_store(&_safepoint_id, safepoint_id);
+}
+
+void ThreadSafepointState::examine_state_of_thread(uint64_t safepoint_count) {
   assert(is_running(), "better be running or just have hit safepoint poll");
 
-  JavaThreadState state = _thread->thread_state();
+  JavaThreadState stable_state;
+  if (!SafepointSynchronize::try_stable_load_state(&stable_state, _thread, safepoint_count)) {
+    // We could not get stable state of the JavaThread.
+    // Consider it running and just return.
+    return;
+  }
 
   // Save the state at the start of safepoint processing.
-  _orig_thread_state = state;
+  _orig_thread_state = stable_state;
 
   // Check for a thread that is suspended. Note that thread resume tries
   // to grab the Threads_lock which we own here, so a thread cannot be
@@ -1050,21 +1037,13 @@
   //
   bool is_suspended = _thread->is_ext_suspended();
   if (is_suspended) {
-    roll_forward(_at_safepoint);
+    account_safe_thread();
     return;
   }
 
-  // Some JavaThread states have an initial safepoint state of
-  // running, but are actually at a safepoint. We will happily
-  // agree and update the safepoint state here.
-  if (SafepointSynchronize::safepoint_safe(_thread, state)) {
-    SafepointSynchronize::check_for_lazy_critical_native(_thread, state);
-    roll_forward(_at_safepoint);
-    return;
-  }
-
-  if (state == _thread_in_vm) {
-    roll_forward(_call_back);
+  if (safepoint_safe_with(_thread, stable_state)) {
+    check_for_lazy_critical_native(_thread, stable_state);
+    account_safe_thread();
     return;
   }
 
@@ -1077,63 +1056,28 @@
   return;
 }
 
-// Returns true is thread could not be rolled forward at present position.
-void ThreadSafepointState::roll_forward(suspend_type type) {
-  _type = type;
-
-  switch(_type) {
-    case _at_safepoint:
-      SafepointSynchronize::signal_thread_at_safepoint();
-      DEBUG_ONLY(_thread->set_visited_for_critical_count(true));
-      if (_thread->in_critical()) {
-        // Notice that this thread is in a critical section
-        SafepointSynchronize::increment_jni_active_count();
-      }
-      break;
-
-    case _call_back:
-      set_has_called_back(false);
-      break;
-
-    case _running:
-    default:
-      ShouldNotReachHere();
+void ThreadSafepointState::account_safe_thread() {
+  SafepointSynchronize::decrement_waiting_to_block();
+  if (_thread->in_critical()) {
+    // Notice that this thread is in a critical section
+    SafepointSynchronize::increment_jni_active_count();
   }
+  DEBUG_ONLY(_thread->set_visited_for_critical_count(SafepointSynchronize::safepoint_counter());)
+  assert(!_safepoint_safe, "Must be unsafe before safe");
+  _safepoint_safe = true;
 }
 
 void ThreadSafepointState::restart() {
-  switch(type()) {
-    case _at_safepoint:
-    case _call_back:
-      break;
-
-    case _running:
-    default:
-       tty->print_cr("restart thread " INTPTR_FORMAT " with state %d",
-                     p2i(_thread), _type);
-       _thread->print();
-      ShouldNotReachHere();
-  }
-  _type = _running;
-  set_has_called_back(false);
+  assert(_safepoint_safe, "Must be safe before unsafe");
+  _safepoint_safe = false;
 }
 
-
 void ThreadSafepointState::print_on(outputStream *st) const {
-  const char *s = NULL;
-
-  switch(_type) {
-    case _running                : s = "_running";              break;
-    case _at_safepoint           : s = "_at_safepoint";         break;
-    case _call_back              : s = "_call_back";            break;
-    default:
-      ShouldNotReachHere();
-  }
+  const char *s = _safepoint_safe ? "_at_safepoint" : "_running";
 
   st->print_cr("Thread: " INTPTR_FORMAT
-              "  [0x%2x] State: %s _has_called_back %d _at_poll_safepoint %d",
-               p2i(_thread), _thread->osthread()->thread_id(), s, _has_called_back,
-               _at_poll_safepoint);
+              "  [0x%2x] State: %s _at_poll_safepoint %d",
+               p2i(_thread), _thread->osthread()->thread_id(), s, _at_poll_safepoint);
 
   _thread->print_thread_state_on(st);
 }
@@ -1143,11 +1087,10 @@
 // Block the thread at poll or poll return for safepoint/handshake.
 void ThreadSafepointState::handle_polling_page_exception() {
 
-  // Check state.  block() will set thread state to thread_in_vm which will
-  // cause the safepoint state _type to become _call_back.
-  suspend_type t = type();
-  assert(!SafepointMechanism::uses_global_page_poll() || t == ThreadSafepointState::_running,
-         "polling page exception on thread not running state: %u", uint(t));
+  // If we're using a global poll, then the thread should not be
+  // marked as safepoint safe yet.
+  assert(!SafepointMechanism::uses_global_page_poll() || !_safepoint_safe,
+         "polling page exception on thread safepoint safe");
 
   // Step 1: Find the nmethod from the return address
   address real_return_addr = thread()->saved_exception_pc();