diff -r 622c26f0673f -r 5d20b085d893 src/hotspot/share/runtime/safepoint.cpp --- a/src/hotspot/share/runtime/safepoint.cpp Fri Feb 15 07:53:03 2019 -0500 +++ b/src/hotspot/share/runtime/safepoint.cpp Fri Feb 15 14:15:10 2019 +0100 @@ -70,70 +70,63 @@ #include "c1/c1_globals.hpp" #endif -template -static void set_current_safepoint_id(E* event, int adjustment = 0) { - assert(event != NULL, "invariant"); - event->set_safepointId(SafepointSynchronize::safepoint_counter() + adjustment); -} - -static void post_safepoint_begin_event(EventSafepointBegin* event, +static void post_safepoint_begin_event(EventSafepointBegin& event, + uint64_t safepoint_id, int thread_count, int critical_thread_count) { - assert(event != NULL, "invariant"); - assert(event->should_commit(), "invariant"); - set_current_safepoint_id(event); - event->set_totalThreadCount(thread_count); - event->set_jniCriticalThreadCount(critical_thread_count); - event->commit(); + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.set_totalThreadCount(thread_count); + event.set_jniCriticalThreadCount(critical_thread_count); + event.commit(); + } } -static void post_safepoint_cleanup_event(EventSafepointCleanup* event) { - assert(event != NULL, "invariant"); - assert(event->should_commit(), "invariant"); - set_current_safepoint_id(event); - event->commit(); -} - -static void post_safepoint_synchronize_event(EventSafepointStateSynchronization* event, - int initial_number_of_threads, - int threads_waiting_to_block, - unsigned int iterations) { - assert(event != NULL, "invariant"); - if (event->should_commit()) { - // Group this event together with the ones committed after the counter is increased - set_current_safepoint_id(event, 1); - event->set_initialThreadCount(initial_number_of_threads); - event->set_runningThreadCount(threads_waiting_to_block); - event->set_iterations(iterations); - event->commit(); +static void post_safepoint_cleanup_event(EventSafepointCleanup& event, uint64_t safepoint_id) { + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.commit(); } } -static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked* event, - int initial_threads_waiting_to_block) { - assert(event != NULL, "invariant"); - assert(event->should_commit(), "invariant"); - set_current_safepoint_id(event); - event->set_runningThreadCount(initial_threads_waiting_to_block); - event->commit(); -} - -static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask* event, - const char* name) { - assert(event != NULL, "invariant"); - if (event->should_commit()) { - set_current_safepoint_id(event); - event->set_name(name); - event->commit(); +static void post_safepoint_synchronize_event(EventSafepointStateSynchronization& event, + uint64_t safepoint_id, + int initial_number_of_threads, + int threads_waiting_to_block, + uint64_t iterations) { + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.set_initialThreadCount(initial_number_of_threads); + event.set_runningThreadCount(threads_waiting_to_block); + event.set_iterations(iterations); + event.commit(); } } -static void post_safepoint_end_event(EventSafepointEnd* event) { - assert(event != NULL, "invariant"); - if (event->should_commit()) { - // Group this event together with the ones committed before the counter increased - set_current_safepoint_id(event, -1); - event->commit(); +static void post_safepoint_wait_blocked_event(EventSafepointWaitBlocked& event, + uint64_t safepoint_id, + int initial_threads_waiting_to_block) { + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.set_runningThreadCount(initial_threads_waiting_to_block); + event.commit(); + } +} + +static void post_safepoint_cleanup_task_event(EventSafepointCleanupTask& event, + uint64_t safepoint_id, + const char* name) { + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.set_name(name); + event.commit(); + } +} + +static void post_safepoint_end_event(EventSafepointEnd& event, uint64_t safepoint_id) { + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.commit(); } } @@ -141,64 +134,170 @@ // Implementation of Safepoint begin/end SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized; -volatile int SafepointSynchronize::_waiting_to_block = 0; +int SafepointSynchronize::_waiting_to_block = 0; volatile uint64_t SafepointSynchronize::_safepoint_counter = 0; +const uint64_t SafepointSynchronize::InactiveSafepointCounter = 0; int SafepointSynchronize::_current_jni_active_count = 0; -long SafepointSynchronize::_end_of_last_safepoint = 0; -int SafepointSynchronize::_defer_thr_suspend_loop_count = 4000; -static const int safepoint_spin_before_yield = 2000; -static volatile int PageArmed = 0 ; // safepoint polling page is RO|RW vs PROT_NONE -static volatile int TryingToBlock = 0 ; // proximate value -- for advisory use only +long SafepointSynchronize::_end_of_last_safepoint = 0; + +WaitBarrier* SafepointSynchronize::_wait_barrier; + +// We need a place to save the desc since it is released before we need it. +static char stopped_description[64] = ""; +static bool _vm_is_waiting = false; + +static volatile bool PageArmed = false; // safepoint polling page is RO|RW vs PROT_NONE static bool timeout_error_printed = false; - -// Statistic related statics +// Statistic related julong SafepointSynchronize::_coalesced_vmop_count = 0; static jlong _safepoint_begin_time = 0; static float _ts_of_current_safepoint = 0.0f; static volatile int _nof_threads_hit_polling_page = 0; -// Roll all threads forward to a safepoint and suspend them all -void SafepointSynchronize::begin() { - EventSafepointBegin begin_event; - Thread* myThread = Thread::current(); - assert(myThread->is_VM_thread(), "Only VM thread may execute a safepoint"); +void SafepointSynchronize::init(Thread* vmthread) { + // WaitBarrier should never be destroyed since we will have + // threads waiting on it while exiting. + _wait_barrier = new WaitBarrier(vmthread); +} + +void SafepointSynchronize::increment_jni_active_count() { + assert(Thread::current()->is_VM_thread(), "Only VM thread may increment"); + ++_current_jni_active_count; +} + +void SafepointSynchronize::decrement_waiting_to_block() { + assert(_waiting_to_block > 0, "sanity check"); + assert(Thread::current()->is_VM_thread(), "Only VM thread may decrement"); + --_waiting_to_block; +} + +static bool thread_not_running(ThreadSafepointState *cur_state) { + if (!cur_state->is_running()) { + return true; + } + cur_state->examine_state_of_thread(SafepointSynchronize::safepoint_counter()); + if (!cur_state->is_running()) { + return true; + } + LogTarget(Trace, safepoint) lt; + if (lt.is_enabled()) { + ResourceMark rm; + LogStream ls(lt); + cur_state->print_on(&ls); + } + return false; +} +#ifdef ASSERT +static void assert_list_is_valid(const ThreadSafepointState* tss_head, int still_running) { + int a = 0; + const ThreadSafepointState *tmp_tss = tss_head; + while (tmp_tss != NULL) { + ++a; + assert(tmp_tss->is_running(), "Illegal initial state"); + tmp_tss = tmp_tss->get_next(); + } + assert(a == still_running, "Must be the same"); +} +#endif // ASSERT + +static void back_off(int iteration) { + // iteration will be 1 the first time we enter this spin back-off. + // naked_short_nanosleep takes tenths of micros which means that + // number of nanoseconds is irrelevant if it's below that. We do + // 20 1 ns sleeps with a total cost of ~1 ms, then we do 1 ms sleeps. + jlong sleep_ns = 1; + if (iteration > 20) { + sleep_ns = NANOUNITS / MILLIUNITS; // 1 ms + } + os::naked_short_nanosleep(sleep_ns); +} + +int SafepointSynchronize::synchronize_threads(jlong safepoint_limit_time, int nof_threads, int* initial_running) +{ + JavaThreadIteratorWithHandle jtiwh; + +#ifdef ASSERT + for (; JavaThread *cur = jtiwh.next(); ) { + assert(cur->safepoint_state()->is_running(), "Illegal initial state"); + } + jtiwh.rewind(); +#endif // ASSERT + + // Iterate through all threads until it has been determined how to stop them all at a safepoint. + int still_running = nof_threads; + ThreadSafepointState *tss_head = NULL; + ThreadSafepointState **p_prev = &tss_head; + for (; JavaThread *cur = jtiwh.next(); ) { + ThreadSafepointState *cur_tss = cur->safepoint_state(); + assert(cur_tss->get_next() == NULL, "Must be NULL"); + if (thread_not_running(cur_tss)) { + --still_running; + } else { + *p_prev = cur_tss; + p_prev = cur_tss->next_ptr(); + } + } + *p_prev = NULL; + + DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);) + + *initial_running = still_running; if (log_is_enabled(Debug, safepoint, stats)) { - _safepoint_begin_time = os::javaTimeNanos(); - _ts_of_current_safepoint = tty->time_stamp().seconds(); - _nof_threads_hit_polling_page = 0; + begin_statistics(nof_threads, still_running); } - Universe::heap()->safepoint_synchronize_begin(); - - // By getting the Threads_lock, we assure that no threads are about to start or - // exit. It is released again in SafepointSynchronize::end(). - Threads_lock->lock(); + int iterations = 1; // The first iteration is above. - assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state"); - - int nof_threads = Threads::number_of_threads(); - - log_debug(safepoint)("Safepoint synchronization initiated. (%d threads)", nof_threads); - - RuntimeService::record_safepoint_begin(); + while (still_running > 0) { + // Check if this has taken too long: + if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) { + print_safepoint_timeout(_spinning_timeout); + } + if (int(iterations) == -1) { // overflow - something is wrong. + // We can only overflow here when we are using global + // polling pages. We keep this guarantee in its original + // form so that searches of the bug database for this + // failure mode find the right bugs. + guarantee (!PageArmed, "invariant"); + } - MutexLocker mu(Safepoint_lock); - - // Reset the count of active JNI critical threads - _current_jni_active_count = 0; + p_prev = &tss_head; + ThreadSafepointState *cur_tss = tss_head; + while (cur_tss != NULL) { + assert(cur_tss->is_running(), "Illegal initial state"); + if (thread_not_running(cur_tss)) { + --still_running; + *p_prev = NULL; + ThreadSafepointState *tmp = cur_tss; + cur_tss = cur_tss->get_next(); + tmp->set_next(NULL); + } else { + *p_prev = cur_tss; + p_prev = cur_tss->next_ptr(); + cur_tss = cur_tss->get_next(); + } + } - // Set number of threads to wait for, before we initiate the callbacks - _waiting_to_block = nof_threads; - TryingToBlock = 0 ; - int still_running = nof_threads; + DEBUG_ONLY(assert_list_is_valid(tss_head, still_running);) + + if (still_running > 0) { + back_off(iterations); + } + + iterations++; + } - // Save the starting time, so that it can be compared to see if this has taken - // too long to complete. - jlong safepoint_limit_time = 0; - timeout_error_printed = false; + assert(tss_head == NULL, "Must be empty"); + if (log_is_enabled(Debug, safepoint, stats)) { + update_statistics_on_spin_end(); + } + return iterations; +} + +void SafepointSynchronize::arm_safepoint() { // Begin the process of bringing the system to a safepoint. // Java threads can be in several different states and are // stopped by different mechanisms: @@ -216,7 +315,7 @@ // memory writes are serialized with respect to each other, // the VM thread issues a memory barrier instruction. // 3. Running compiled Code - // Compiled code reads a global (Safepoint Polling) page that + // Compiled code reads the local polling page that // is set to fault if we are trying to get to a safepoint. // 4. Blocked // A thread which is blocked will not be allowed to return from the @@ -226,275 +325,154 @@ // between states, the safepointing code will wait for the thread to // block itself when it attempts transitions to a new state. // - { - EventSafepointStateSynchronization sync_event; - int initial_running = 0; - - _state = _synchronizing; - - if (SafepointMechanism::uses_thread_local_poll()) { - // Arming the per thread poll while having _state != _not_synchronized means safepointing - log_trace(safepoint)("Setting thread local yield flag for threads"); - OrderAccess::storestore(); // storestore, global state -> local state - for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) { - // Make sure the threads start polling, it is time to yield. - SafepointMechanism::arm_local_poll(cur); - } - } - OrderAccess::fence(); // storestore|storeload, global state -> local state - - if (SafepointMechanism::uses_global_page_poll()) { - // Make interpreter safepoint aware - Interpreter::notice_safepoints(); - - // Make polling safepoint aware - guarantee (PageArmed == 0, "invariant") ; - PageArmed = 1 ; - os::make_polling_page_unreadable(); - } - - // Consider using active_processor_count() ... but that call is expensive. - int ncpus = os::processor_count() ; - unsigned int iterations = 0; - - { - JavaThreadIteratorWithHandle jtiwh; -#ifdef ASSERT - for (; JavaThread *cur = jtiwh.next(); ) { - assert(cur->safepoint_state()->is_running(), "Illegal initial state"); - // Clear the visited flag to ensure that the critical counts are collected properly. - cur->set_visited_for_critical_count(false); - } -#endif // ASSERT - if (SafepointTimeout) - safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS; + // We must never miss a thread with correct safepoint id, so we must make sure we arm + // the wait barrier for the next safepoint id/counter. + // Arming must be done after resetting _current_jni_active_count, _waiting_to_block. + _wait_barrier->arm(static_cast(_safepoint_counter + 1)); - // Iterate through all threads until it have been determined how to stop them all at a safepoint - int steps = 0 ; - while(still_running > 0) { - jtiwh.rewind(); - for (; JavaThread *cur = jtiwh.next(); ) { - assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended"); - ThreadSafepointState *cur_state = cur->safepoint_state(); - if (cur_state->is_running()) { - cur_state->examine_state_of_thread(); - if (!cur_state->is_running()) { - still_running--; - // consider adjusting steps downward: - // steps = 0 - // steps -= NNN - // steps >>= 1 - // steps = MIN(steps, 2000-100) - // if (iterations != 0) steps -= NNN - } - LogTarget(Trace, safepoint) lt; - if (lt.is_enabled()) { - ResourceMark rm; - LogStream ls(lt); - cur_state->print_on(&ls); - } - } - } - - if (iterations == 0) { - initial_running = still_running; - if (log_is_enabled(Debug, safepoint, stats)) { - begin_statistics(nof_threads, still_running); - } - } - - if (still_running > 0) { - // Check for if it takes to long - if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) { - print_safepoint_timeout(_spinning_timeout); - } + assert((_safepoint_counter & 0x1) == 0, "must be even"); + // The store to _safepoint_counter must happen after any stores in arming. + OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1); - // Spin to avoid context switching. - // There's a tension between allowing the mutators to run (and rendezvous) - // vs spinning. As the VM thread spins, wasting cycles, it consumes CPU that - // a mutator might otherwise use profitably to reach a safepoint. Excessive - // spinning by the VM thread on a saturated system can increase rendezvous latency. - // Blocking or yielding incur their own penalties in the form of context switching - // and the resultant loss of $ residency. - // - // Further complicating matters is that yield() does not work as naively expected - // on many platforms -- yield() does not guarantee that any other ready threads - // will run. As such we revert to naked_short_sleep() after some number of iterations. - // nakes_short_sleep() is implemented as a short unconditional sleep. - // Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping - // can actually increase the time it takes the VM thread to detect that a system-wide - // stop-the-world safepoint has been reached. In a pathological scenario such as that - // described in CR6415670 the VMthread may sleep just before the mutator(s) become safe. - // In that case the mutators will be stalled waiting for the safepoint to complete and the - // the VMthread will be sleeping, waiting for the mutators to rendezvous. The VMthread - // will eventually wake up and detect that all mutators are safe, at which point - // we'll again make progress. - // - // Beware too that that the VMThread typically runs at elevated priority. - // Its default priority is higher than the default mutator priority. - // Obviously, this complicates spinning. - // - // Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0). - // Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will. - // - // See the comments in synchronizer.cpp for additional remarks on spinning. - // - // In the future we might: - // -- Modify the safepoint scheme to avoid potentially unbounded spinning. - // This is tricky as the path used by a thread exiting the JVM (say on - // on JNI call-out) simply stores into its state field. The burden - // is placed on the VM thread, which must poll (spin). - // -- Find something useful to do while spinning. If the safepoint is GC-related - // we might aggressively scan the stacks of threads that are already safe. - // -- YieldTo() any still-running mutators that are ready but OFFPROC. - // -- Check system saturation. If the system is not fully saturated then - // simply spin and avoid sleep/yield. - // -- As still-running mutators rendezvous they could unpark the sleeping - // VMthread. This works well for still-running mutators that become - // safe. The VMthread must still poll for mutators that call-out. - // -- Drive the policy on time-since-begin instead of iterations. - // -- Consider making the spin duration a function of the # of CPUs: - // Spin = (((ncpus-1) * M) + K) + F(still_running) - // Alternately, instead of counting iterations of the outer loop - // we could count the # of threads visited in the inner loop, above. - // -- On windows consider using the return value from SwitchThreadTo() - // to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions. + // We are synchronizing + OrderAccess::storestore(); // Ordered with _safepoint_counter + _state = _synchronizing; - if (int(iterations) == -1) { // overflow - something is wrong. - // We can only overflow here when we are using global - // polling pages. We keep this guarantee in its original - // form so that searches of the bug database for this - // failure mode find the right bugs. - guarantee (PageArmed == 0, "invariant"); - } - - // Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or - // ((still_running + _waiting_to_block - TryingToBlock)) < ncpus) - ++steps ; - if (ncpus > 1 && steps < safepoint_spin_before_yield) { - SpinPause() ; // MP-Polite spin - } else - if (steps < _defer_thr_suspend_loop_count) { - os::naked_yield() ; - } else { - os::naked_short_sleep(1); - } - - iterations ++ ; - } - assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long"); - } - } // ThreadsListHandle destroyed here. - assert(still_running == 0, "sanity check"); - - if (log_is_enabled(Debug, safepoint, stats)) { - update_statistics_on_spin_end(); - } - if (sync_event.should_commit()) { - post_safepoint_synchronize_event(&sync_event, initial_running, _waiting_to_block, iterations); + if (SafepointMechanism::uses_thread_local_poll()) { + // Arming the per thread poll while having _state != _not_synchronized means safepointing + log_trace(safepoint)("Setting thread local yield flag for threads"); + OrderAccess::storestore(); // storestore, global state -> local state + for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) { + // Make sure the threads start polling, it is time to yield. + SafepointMechanism::arm_local_poll(cur); } } + OrderAccess::fence(); // storestore|storeload, global state -> local state - // wait until all threads are stopped - { - EventSafepointWaitBlocked wait_blocked_event; - int initial_waiting_to_block = _waiting_to_block; + if (SafepointMechanism::uses_global_page_poll()) { + // Make interpreter safepoint aware + Interpreter::notice_safepoints(); + + // Make polling safepoint aware + guarantee (!PageArmed, "invariant") ; + PageArmed = true; + os::make_polling_page_unreadable(); + } +} + +// Roll all threads forward to a safepoint and suspend them all +void SafepointSynchronize::begin() { + EventSafepointBegin begin_event; + assert(Thread::current()->is_VM_thread(), "Only VM thread may execute a safepoint"); + + strncpy(stopped_description, VMThread::vm_safepoint_description(), sizeof(stopped_description) - 1); + stopped_description[sizeof(stopped_description) - 1] = '\0'; + + if (log_is_enabled(Debug, safepoint, stats)) { + _safepoint_begin_time = os::javaTimeNanos(); + _ts_of_current_safepoint = tty->time_stamp().seconds(); + _nof_threads_hit_polling_page = 0; + } + + Universe::heap()->safepoint_synchronize_begin(); + + // By getting the Threads_lock, we assure that no threads are about to start or + // exit. It is released again in SafepointSynchronize::end(). + Threads_lock->lock(); + + assert( _state == _not_synchronized, "trying to safepoint synchronize with wrong state"); - while (_waiting_to_block > 0) { - log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block); - if (!SafepointTimeout || timeout_error_printed) { - Safepoint_lock->wait(true); // true, means with no safepoint checks - } else { - // Compute remaining time - jlong remaining_time = safepoint_limit_time - os::javaTimeNanos(); + int nof_threads = Threads::number_of_threads(); + + log_debug(safepoint)("Safepoint synchronization initiated using %s wait barrier. (%d threads)", _wait_barrier->description(), nof_threads); + + RuntimeService::record_safepoint_begin(); + + // Reset the count of active JNI critical threads + _current_jni_active_count = 0; + + // Set number of threads to wait for + _waiting_to_block = nof_threads; - // If there is no remaining time, then there is an error - if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) { - print_safepoint_timeout(_blocking_timeout); - } - } - } - assert(_waiting_to_block == 0, "sanity check"); + jlong safepoint_limit_time = 0; + if (SafepointTimeout) { + // Set the limit time, so that it can be compared to see if this has taken + // too long to complete. + safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS; + } + timeout_error_printed = false; + + EventSafepointStateSynchronization sync_event; + int initial_running = 0; + + // Arms the safepoint, _current_jni_active_count and _waiting_to_block must be set before. + arm_safepoint(); + + // Will spin until all threads are safe. + int iterations = synchronize_threads(safepoint_limit_time, nof_threads, &initial_running); + assert(_waiting_to_block == 0, "No thread should be running"); + + post_safepoint_synchronize_event(sync_event, _safepoint_counter, initial_running, + _waiting_to_block, iterations); + + // Keep event from now. + EventSafepointWaitBlocked wait_blocked_event; #ifndef PRODUCT - if (SafepointTimeout) { - jlong current_time = os::javaTimeNanos(); - if (safepoint_limit_time < current_time) { - log_warning(safepoint)("# SafepointSynchronize: Finished after " - INT64_FORMAT_W(6) " ms", - (int64_t)((current_time - safepoint_limit_time) / MICROUNITS + - (jlong)SafepointTimeoutDelay)); - } + if (SafepointTimeout) { + jlong current_time = os::javaTimeNanos(); + if (safepoint_limit_time < current_time) { + log_warning(safepoint)("# SafepointSynchronize: Finished after " + INT64_FORMAT_W(6) " ms", + (int64_t)((current_time - safepoint_limit_time) / MICROUNITS + + (jlong)SafepointTimeoutDelay)); } + } #endif - assert((_safepoint_counter & 0x1) == 0, "must be even"); - assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); - _safepoint_counter ++; + assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); - // Record state - _state = _synchronized; + // Record state + _state = _synchronized; - OrderAccess::fence(); - if (wait_blocked_event.should_commit()) { - post_safepoint_wait_blocked_event(&wait_blocked_event, initial_waiting_to_block); - } - } + OrderAccess::fence(); + + post_safepoint_wait_blocked_event(wait_blocked_event, _safepoint_counter, 0); #ifdef ASSERT // Make sure all the threads were visited. for (JavaThreadIteratorWithHandle jtiwh; JavaThread *cur = jtiwh.next(); ) { - assert(cur->was_visited_for_critical_count(), "missed a thread"); + assert(cur->was_visited_for_critical_count(_safepoint_counter), "missed a thread"); } #endif // ASSERT // Update the count of active JNI critical regions GCLocker::set_jni_lock_count(_current_jni_active_count); - log_info(safepoint)("Entering safepoint region: %s", VMThread::vm_safepoint_description()); + log_info(safepoint)("Entering safepoint region: %s", stopped_description); RuntimeService::record_safepoint_synchronized(); if (log_is_enabled(Debug, safepoint, stats)) { update_statistics_on_sync_end(os::javaTimeNanos()); } - // Call stuff that needs to be run when a safepoint is just about to be completed - { - EventSafepointCleanup cleanup_event; - do_cleanup_tasks(); - if (cleanup_event.should_commit()) { - post_safepoint_cleanup_event(&cleanup_event); - } - } + // We do the safepoint cleanup first since a GC related safepoint + // needs cleanup to be completed before running the GC op. + EventSafepointCleanup cleanup_event; + do_cleanup_tasks(); + post_safepoint_cleanup_event(cleanup_event, _safepoint_counter); if (log_is_enabled(Debug, safepoint, stats)) { // Record how much time spend on the above cleanup tasks update_statistics_on_cleanup_end(os::javaTimeNanos()); } - if (begin_event.should_commit()) { - post_safepoint_begin_event(&begin_event, nof_threads, _current_jni_active_count); - } + post_safepoint_begin_event(begin_event, _safepoint_counter, nof_threads, _current_jni_active_count); } -// Wake up all threads, so they are ready to resume execution after the safepoint -// operation has been carried out -void SafepointSynchronize::end() { - assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); - assert((_safepoint_counter & 0x1) == 1, "must be odd"); - EventSafepointEnd event; - _safepoint_counter ++; - // memory fence isn't required here since an odd _safepoint_counter - // value can do no harm and a fence is issued below anyway. - - DEBUG_ONLY(Thread* myThread = Thread::current();) - assert(myThread->is_VM_thread(), "Only VM thread can execute a safepoint"); - - if (log_is_enabled(Debug, safepoint, stats)) { - end_statistics(os::javaTimeNanos()); - } - +void SafepointSynchronize::disarm_safepoint() { + uint64_t safepoint_id = _safepoint_counter; { JavaThreadIteratorWithHandle jtiwh; #ifdef ASSERT @@ -508,66 +486,74 @@ } #endif // ASSERT - if (PageArmed) { - assert(SafepointMechanism::uses_global_page_poll(), "sanity"); + if (SafepointMechanism::uses_global_page_poll()) { + guarantee (PageArmed, "invariant"); // Make polling safepoint aware os::make_polling_page_readable(); - PageArmed = 0 ; - } - - if (SafepointMechanism::uses_global_page_poll()) { + PageArmed = false; // Remove safepoint check from interpreter Interpreter::ignore_safepoints(); } - { - MutexLocker mu(Safepoint_lock); + OrderAccess::fence(); // keep read and write of _state from floating up + assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization"); + + // Change state first to _not_synchronized. + // No threads should see _synchronized when running. + _state = _not_synchronized; + + // Set the next dormant (even) safepoint id. + assert((_safepoint_counter & 0x1) == 1, "must be odd"); + OrderAccess::release_store(&_safepoint_counter, _safepoint_counter + 1); - assert(_state == _synchronized, "must be synchronized before ending safepoint synchronization"); + OrderAccess::fence(); // Keep the local state from floating up. + + jtiwh.rewind(); + for (; JavaThread *current = jtiwh.next(); ) { + // Clear the visited flag to ensure that the critical counts are collected properly. + DEBUG_ONLY(current->reset_visited_for_critical_count(safepoint_id);) + ThreadSafepointState* cur_state = current->safepoint_state(); + assert(!cur_state->is_running(), "Thread not suspended at safepoint"); + cur_state->restart(); // TSS _running + assert(cur_state->is_running(), "safepoint state has not been reset"); + SafepointMechanism::disarm_local_poll(current); + } + } // ~JavaThreadIteratorWithHandle - if (SafepointMechanism::uses_thread_local_poll()) { - _state = _not_synchronized; - OrderAccess::storestore(); // global state -> local state - jtiwh.rewind(); - for (; JavaThread *current = jtiwh.next(); ) { - ThreadSafepointState* cur_state = current->safepoint_state(); - cur_state->restart(); // TSS _running - SafepointMechanism::disarm_local_poll(current); - } - log_info(safepoint)("Leaving safepoint region"); - } else { - // Set to not synchronized, so the threads will not go into the signal_thread_blocked method - // when they get restarted. - _state = _not_synchronized; - OrderAccess::fence(); + log_info(safepoint)("Leaving safepoint region"); + + RuntimeService::record_safepoint_end(); - log_info(safepoint)("Leaving safepoint region"); + // Release threads lock, so threads can be created/destroyed again. + Threads_lock->unlock(); + + // Wake threads after local state is correctly set. + _wait_barrier->disarm(); +} - // Start suspended threads - jtiwh.rewind(); - for (; JavaThread *current = jtiwh.next(); ) { - ThreadSafepointState* cur_state = current->safepoint_state(); - assert(cur_state->type() != ThreadSafepointState::_running, "Thread not suspended at safepoint"); - cur_state->restart(); - assert(cur_state->is_running(), "safepoint state has not been reset"); - } - } +// Wake up all threads, so they are ready to resume execution after the safepoint +// operation has been carried out +void SafepointSynchronize::end() { + assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); + EventSafepointEnd event; + uint64_t safepoint_id = _safepoint_counter; + assert(Thread::current()->is_VM_thread(), "Only VM thread can execute a safepoint"); - RuntimeService::record_safepoint_end(); + if (log_is_enabled(Debug, safepoint, stats)) { + end_statistics(os::javaTimeNanos()); + } - // Release threads lock, so threads can be created/destroyed again. - // It will also release all threads blocked in signal_thread_blocked. - Threads_lock->unlock(); - } - } // ThreadsListHandle destroyed here. + disarm_safepoint(); + + RuntimeService::record_safepoint_epilog(stopped_description); Universe::heap()->safepoint_synchronize_end(); + // record this time so VMThread can keep track how much time has elapsed // since last safepoint. _end_of_last_safepoint = os::javaTimeMillis(); - if (event.should_commit()) { - post_safepoint_end_event(&event); - } + + post_safepoint_end_event(event, safepoint_id); } bool SafepointSynchronize::is_cleanup_needed() { @@ -613,6 +599,7 @@ _counters(counters) {} void work(uint worker_id) { + uint64_t safepoint_id = SafepointSynchronize::safepoint_counter(); // All threads deflate monitors and mark nmethods (if necessary). Threads::possibly_parallel_threads_do(true, &_cleanup_threads_cl); @@ -621,9 +608,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); ObjectSynchronizer::deflate_idle_monitors(_counters); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES)) { @@ -631,9 +617,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); InlineCacheBuffer::update_inline_caches(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_COMPILATION_POLICY)) { @@ -641,9 +626,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); CompilationPolicy::policy()->do_safepoint_work(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH)) { @@ -652,9 +636,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); SymbolTable::rehash_table(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } } @@ -664,9 +647,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); StringTable::rehash_table(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } } @@ -677,9 +659,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); ClassLoaderDataGraph::purge_if_needed(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } if (_subtasks.try_claim_task(SafepointSynchronize::SAFEPOINT_CLEANUP_SYSTEM_DICTIONARY_RESIZE)) { @@ -687,9 +668,8 @@ EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); ClassLoaderDataGraph::resize_if_needed(); - if (event.should_commit()) { - post_safepoint_cleanup_task_event(&event, name); - } + + post_safepoint_cleanup_task_event(event, safepoint_id, name); } _subtasks.all_tasks_completed(_num_workers); @@ -736,15 +716,48 @@ assert(InlineCacheBuffer::is_empty(), "should have cleaned up ICBuffer"); } +// Methods for determining if a JavaThread is safepoint safe. -bool SafepointSynchronize::safepoint_safe(JavaThread *thread, JavaThreadState state) { +// False means unsafe with undetermined state. +// True means a determined state, but it may be an unsafe state. +// If called from a non-safepoint context safepoint_count MUST be InactiveSafepointCounter. +bool SafepointSynchronize::try_stable_load_state(JavaThreadState *state, JavaThread *thread, uint64_t safepoint_count) { + assert((safepoint_count != InactiveSafepointCounter && + Thread::current() == (Thread*)VMThread::vm_thread() && + SafepointSynchronize::_state != _not_synchronized) + || safepoint_count == InactiveSafepointCounter, "Invalid check"); + + // To handle the thread_blocked state on the backedge of the WaitBarrier from + // previous safepoint and reading the reset value (0/InactiveSafepointCounter) we + // re-read state after we read thread safepoint id. The JavaThread changes its + // thread state from thread_blocked before resetting safepoint id to 0. + // This guarantees the second read will be from an updated thread state. It can + // either be different state making this an unsafe state or it can see blocked + // again. When we see blocked twice with a 0 safepoint id, either: + // - It is normally blocked, e.g. on Mutex, TBIVM. + // - It was in SS:block(), looped around to SS:block() and is blocked on the WaitBarrier. + // - It was in SS:block() but now on a Mutex. + // All of these cases are safe. + + *state = thread->thread_state(); + OrderAccess::loadload(); + uint64_t sid = thread->safepoint_state()->get_safepoint_id(); // Load acquire + if (sid != InactiveSafepointCounter && sid != safepoint_count) { + // In an old safepoint, state not relevant. + return false; + } + return *state == thread->thread_state(); +} + +static bool safepoint_safe_with(JavaThread *thread, JavaThreadState state) { switch(state) { case _thread_in_native: // native threads are safe if they have no java stack or have walkable stack return !thread->has_last_Java_frame() || thread->frame_anchor()->walkable(); - // blocked threads should have already have walkable stack case _thread_blocked: + // On wait_barrier or blocked. + // Blocked threads should already have walkable stack. assert(!thread->has_last_Java_frame() || thread->frame_anchor()->walkable(), "blocked and not walkable"); return true; @@ -753,12 +766,28 @@ } } +bool SafepointSynchronize::handshake_safe(JavaThread *thread) { + // The polls must be armed otherwise the safe state can change to unsafe at any time. + assert(SafepointMechanism::should_block(thread), "Must be armed"); + // This function must be called with the Threads_lock held so an externally + // suspended thread cannot be resumed thus it is safe. + assert(Threads_lock->owned_by_self() && Thread::current()->is_VM_thread(), + "Must hold Threads_lock and be VMThread"); + if (thread->is_ext_suspended() || thread->is_terminated()) { + return true; + } + JavaThreadState stable_state; + if (try_stable_load_state(&stable_state, thread, InactiveSafepointCounter)) { + return safepoint_safe_with(thread, stable_state); + } + return false; +} // See if the thread is running inside a lazy critical native and // update the thread critical count if so. Also set a suspend flag to // cause the native wrapper to return into the JVM to do the unlock // once the native finishes. -void SafepointSynchronize::check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) { +static void check_for_lazy_critical_native(JavaThread *thread, JavaThreadState state) { if (state == _thread_in_native && thread->has_last_Java_frame() && thread->frame_anchor()->walkable()) { @@ -788,12 +817,10 @@ } } - +// ------------------------------------------------------------------------------------------------------- +// Implementation of Safepoint blocking point -// ------------------------------------------------------------------------------------------------------- -// Implementation of Safepoint callback point - -void SafepointSynchronize::block(JavaThread *thread, bool block_in_safepoint_check) { +void SafepointSynchronize::block(JavaThread *thread) { assert(thread != NULL, "thread must be set"); assert(thread->is_Java_thread(), "not a Java thread"); @@ -813,101 +840,45 @@ JavaThreadState state = thread->thread_state(); thread->frame_anchor()->make_walkable(thread); + uint64_t safepoint_id = SafepointSynchronize::safepoint_counter(); // Check that we have a valid thread_state at this point switch(state) { case _thread_in_vm_trans: case _thread_in_Java: // From compiled code - - // We are highly likely to block on the Safepoint_lock. In order to avoid blocking in this case, - // we pretend we are still in the VM. - thread->set_thread_state(_thread_in_vm); - - if (is_synchronizing()) { - Atomic::inc (&TryingToBlock) ; - } - - // We will always be holding the Safepoint_lock when we are examine the state - // of a thread. Hence, the instructions between the Safepoint_lock->lock() and - // Safepoint_lock->unlock() are happening atomic with regards to the safepoint code - Safepoint_lock->lock_without_safepoint_check(); - if (is_synchronizing()) { - // Decrement the number of threads to wait for and signal vm thread - assert(_waiting_to_block > 0, "sanity check"); - _waiting_to_block--; - thread->safepoint_state()->set_has_called_back(true); - - DEBUG_ONLY(thread->set_visited_for_critical_count(true)); - if (thread->in_critical()) { - // Notice that this thread is in a critical section - increment_jni_active_count(); - } - - // Consider (_waiting_to_block < 2) to pipeline the wakeup of the VM thread - if (_waiting_to_block == 0) { - Safepoint_lock->notify_all(); - } - } - - if (block_in_safepoint_check) { - // We transition the thread to state _thread_blocked here, but - // we can't do our usual check for external suspension and then - // self-suspend after the lock_without_safepoint_check() call - // below because we are often called during transitions while - // we hold different locks. That would leave us suspended while - // holding a resource which results in deadlocks. - thread->set_thread_state(_thread_blocked); - Safepoint_lock->unlock(); - - // We now try to acquire the threads lock. Since this lock is hold by the VM thread during - // the entire safepoint, the threads will all line up here during the safepoint. - Threads_lock->lock_without_safepoint_check(); - // restore original state. This is important if the thread comes from compiled code, so it - // will continue to execute with the _thread_in_Java state. - thread->set_thread_state(state); - Threads_lock->unlock(); - } else { - // We choose not to block in this call since we would be - // caught when transitioning back anyways if the safepoint - // is still going on. - thread->set_thread_state(state); - Safepoint_lock->unlock(); - } - break; - case _thread_in_native_trans: case _thread_blocked_trans: case _thread_new_trans: - if (thread->safepoint_state()->type() == ThreadSafepointState::_call_back && - block_in_safepoint_check) { - thread->print_thread_state(); - fatal("Deadlock in safepoint code. " - "Should have called back to the VM before blocking."); - } + + // We have no idea where the VMThread is, it might even be at next safepoint. + // So we can miss this poll, but stop at next. - // We transition the thread to state _thread_blocked here, but - // we can't do our usual check for external suspension and then - // self-suspend after the lock_without_safepoint_check() call - // below because we are often called during transitions while - // we hold different locks. That would leave us suspended while - // holding a resource which results in deadlocks. + // Load dependent store, it must not pass loading of safepoint_id. + thread->safepoint_state()->set_safepoint_id(safepoint_id); // Release store + + // This part we can skip if we notice we miss or are in a future safepoint. + OrderAccess::storestore(); thread->set_thread_state(_thread_blocked); - // It is not safe to suspend a thread if we discover it is in _thread_in_native_trans. Hence, - // the safepoint code might still be waiting for it to block. We need to change the state here, - // so it can see that it is at a safepoint. + OrderAccess::fence(); // Load in wait barrier should not float up + _wait_barrier->wait(static_cast(safepoint_id)); + assert(_state != _synchronized, "Can't be"); - // Block until the safepoint operation is completed. - Threads_lock->lock_without_safepoint_check(); - - // Restore state + // If barrier is disarmed stop store from floating above loads in barrier. + OrderAccess::loadstore(); thread->set_thread_state(state); - Threads_lock->unlock(); + // Then we reset the safepoint id to inactive. + thread->safepoint_state()->reset_safepoint_id(); // Release store + + OrderAccess::fence(); + break; default: fatal("Illegal threadstate encountered: %d", state); } + guarantee(thread->safepoint_state()->get_safepoint_id() == InactiveSafepointCounter, + "The safepoint id should be set only in block path"); // Check for pending. async. exceptions or suspends - except if the // thread was blocked inside the VM. has_special_runtime_exit_condition() @@ -979,7 +950,7 @@ if (cur_thread->thread_state() != _thread_blocked && ((reason == _spinning_timeout && cur_state->is_running()) || - (reason == _blocking_timeout && !cur_state->has_called_back()))) { + (reason == _blocking_timeout))) { ls.print("# "); cur_thread->print_on(&ls); ls.cr(); @@ -1001,11 +972,10 @@ // ------------------------------------------------------------------------------------------------------- // Implementation of ThreadSafepointState -ThreadSafepointState::ThreadSafepointState(JavaThread *thread) { - _thread = thread; - _type = _running; - _has_called_back = false; - _at_poll_safepoint = false; +ThreadSafepointState::ThreadSafepointState(JavaThread *thread) + : _at_poll_safepoint(false), _thread(thread), _safepoint_safe(false), + _safepoint_id(SafepointSynchronize::InactiveSafepointCounter), + _orig_thread_state(_thread_uninitialized), _next(NULL) { } void ThreadSafepointState::create(JavaThread *thread) { @@ -1020,13 +990,30 @@ } } -void ThreadSafepointState::examine_state_of_thread() { +uint64_t ThreadSafepointState::get_safepoint_id() const { + return OrderAccess::load_acquire(&_safepoint_id); +} + +void ThreadSafepointState::reset_safepoint_id() { + OrderAccess::release_store(&_safepoint_id, SafepointSynchronize::InactiveSafepointCounter); +} + +void ThreadSafepointState::set_safepoint_id(uint64_t safepoint_id) { + OrderAccess::release_store(&_safepoint_id, safepoint_id); +} + +void ThreadSafepointState::examine_state_of_thread(uint64_t safepoint_count) { assert(is_running(), "better be running or just have hit safepoint poll"); - JavaThreadState state = _thread->thread_state(); + JavaThreadState stable_state; + if (!SafepointSynchronize::try_stable_load_state(&stable_state, _thread, safepoint_count)) { + // We could not get stable state of the JavaThread. + // Consider it running and just return. + return; + } // Save the state at the start of safepoint processing. - _orig_thread_state = state; + _orig_thread_state = stable_state; // Check for a thread that is suspended. Note that thread resume tries // to grab the Threads_lock which we own here, so a thread cannot be @@ -1050,21 +1037,13 @@ // bool is_suspended = _thread->is_ext_suspended(); if (is_suspended) { - roll_forward(_at_safepoint); + account_safe_thread(); return; } - // Some JavaThread states have an initial safepoint state of - // running, but are actually at a safepoint. We will happily - // agree and update the safepoint state here. - if (SafepointSynchronize::safepoint_safe(_thread, state)) { - SafepointSynchronize::check_for_lazy_critical_native(_thread, state); - roll_forward(_at_safepoint); - return; - } - - if (state == _thread_in_vm) { - roll_forward(_call_back); + if (safepoint_safe_with(_thread, stable_state)) { + check_for_lazy_critical_native(_thread, stable_state); + account_safe_thread(); return; } @@ -1077,63 +1056,28 @@ return; } -// Returns true is thread could not be rolled forward at present position. -void ThreadSafepointState::roll_forward(suspend_type type) { - _type = type; - - switch(_type) { - case _at_safepoint: - SafepointSynchronize::signal_thread_at_safepoint(); - DEBUG_ONLY(_thread->set_visited_for_critical_count(true)); - if (_thread->in_critical()) { - // Notice that this thread is in a critical section - SafepointSynchronize::increment_jni_active_count(); - } - break; - - case _call_back: - set_has_called_back(false); - break; - - case _running: - default: - ShouldNotReachHere(); +void ThreadSafepointState::account_safe_thread() { + SafepointSynchronize::decrement_waiting_to_block(); + if (_thread->in_critical()) { + // Notice that this thread is in a critical section + SafepointSynchronize::increment_jni_active_count(); } + DEBUG_ONLY(_thread->set_visited_for_critical_count(SafepointSynchronize::safepoint_counter());) + assert(!_safepoint_safe, "Must be unsafe before safe"); + _safepoint_safe = true; } void ThreadSafepointState::restart() { - switch(type()) { - case _at_safepoint: - case _call_back: - break; - - case _running: - default: - tty->print_cr("restart thread " INTPTR_FORMAT " with state %d", - p2i(_thread), _type); - _thread->print(); - ShouldNotReachHere(); - } - _type = _running; - set_has_called_back(false); + assert(_safepoint_safe, "Must be safe before unsafe"); + _safepoint_safe = false; } - void ThreadSafepointState::print_on(outputStream *st) const { - const char *s = NULL; - - switch(_type) { - case _running : s = "_running"; break; - case _at_safepoint : s = "_at_safepoint"; break; - case _call_back : s = "_call_back"; break; - default: - ShouldNotReachHere(); - } + const char *s = _safepoint_safe ? "_at_safepoint" : "_running"; st->print_cr("Thread: " INTPTR_FORMAT - " [0x%2x] State: %s _has_called_back %d _at_poll_safepoint %d", - p2i(_thread), _thread->osthread()->thread_id(), s, _has_called_back, - _at_poll_safepoint); + " [0x%2x] State: %s _at_poll_safepoint %d", + p2i(_thread), _thread->osthread()->thread_id(), s, _at_poll_safepoint); _thread->print_thread_state_on(st); } @@ -1143,11 +1087,10 @@ // Block the thread at poll or poll return for safepoint/handshake. void ThreadSafepointState::handle_polling_page_exception() { - // Check state. block() will set thread state to thread_in_vm which will - // cause the safepoint state _type to become _call_back. - suspend_type t = type(); - assert(!SafepointMechanism::uses_global_page_poll() || t == ThreadSafepointState::_running, - "polling page exception on thread not running state: %u", uint(t)); + // If we're using a global poll, then the thread should not be + // marked as safepoint safe yet. + assert(!SafepointMechanism::uses_global_page_poll() || !_safepoint_safe, + "polling page exception on thread safepoint safe"); // Step 1: Find the nmethod from the return address address real_return_addr = thread()->saved_exception_pc();