# HG changeset patch # User pchilanomate # Date 1549397533 18000 # Node ID 043ae846819f4cd86973fcb255b2994a2ab9da20 # Parent 2c6c0fabe6a2482110cccd96696260f1729165f4 8210832: Remove sneaky locking in class Monitor Summary: Removed sneaky locking and simplified vm monitors implementation Reviewed-by: rehn, dcubed, pliden, dholmes, coleenp Contributed-by: david.holmes@oracle.com, patricio.chilano.mateo@oracle.com diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/posix/os_posix.cpp --- a/src/hotspot/os/posix/os_posix.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/posix/os_posix.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -2215,5 +2215,74 @@ } } +// Platform Monitor implementation + +os::PlatformMonitor::PlatformMonitor() { + int status = pthread_cond_init(&_cond, _condAttr); + assert_status(status == 0, status, "cond_init"); + status = pthread_mutex_init(&_mutex, _mutexAttr); + assert_status(status == 0, status, "mutex_init"); +} + +os::PlatformMonitor::~PlatformMonitor() { + int status = pthread_cond_destroy(&_cond); + assert_status(status == 0, status, "cond_destroy"); + status = pthread_mutex_destroy(&_mutex); + assert_status(status == 0, status, "mutex_destroy"); +} + +void os::PlatformMonitor::lock() { + int status = pthread_mutex_lock(&_mutex); + assert_status(status == 0, status, "mutex_lock"); +} + +void os::PlatformMonitor::unlock() { + int status = pthread_mutex_unlock(&_mutex); + assert_status(status == 0, status, "mutex_unlock"); +} + +bool os::PlatformMonitor::try_lock() { + int status = pthread_mutex_trylock(&_mutex); + assert_status(status == 0 || status == EBUSY, status, "mutex_trylock"); + return status == 0; +} + +// Must already be locked +int os::PlatformMonitor::wait(jlong millis) { + assert(millis >= 0, "negative timeout"); + if (millis > 0) { + struct timespec abst; + // We have to watch for overflow when converting millis to nanos, + // but if millis is that large then we will end up limiting to + // MAX_SECS anyway, so just do that here. + if (millis / MILLIUNITS > MAX_SECS) { + millis = jlong(MAX_SECS) * MILLIUNITS; + } + to_abstime(&abst, millis * (NANOUNITS / MILLIUNITS), false, false); + + int ret = OS_TIMEOUT; + int status = pthread_cond_timedwait(&_cond, &_mutex, &abst); + assert_status(status == 0 || status == ETIMEDOUT, + status, "cond_timedwait"); + if (status == 0) { + ret = OS_OK; + } + return ret; + } else { + int status = pthread_cond_wait(&_cond, &_mutex); + assert_status(status == 0, status, "cond_wait"); + return OS_OK; + } +} + +void os::PlatformMonitor::notify() { + int status = pthread_cond_signal(&_cond); + assert_status(status == 0, status, "cond_signal"); +} + +void os::PlatformMonitor::notify_all() { + int status = pthread_cond_broadcast(&_cond); + assert_status(status == 0, status, "cond_broadcast"); +} #endif // !SOLARIS diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/posix/os_posix.hpp --- a/src/hotspot/os/posix/os_posix.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/posix/os_posix.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -224,6 +224,23 @@ PlatformParker(); }; +// Platform specific implementation that underpins VM Monitor/Mutex class +class PlatformMonitor : public CHeapObj { + private: + pthread_mutex_t _mutex; // Native mutex for locking + pthread_cond_t _cond; // Native condition variable for blocking + + public: + PlatformMonitor(); + ~PlatformMonitor(); + void lock(); + void unlock(); + bool try_lock(); + int wait(jlong millis); + void notify(); + void notify_all(); +}; + #endif // !SOLARIS #endif // OS_POSIX_OS_POSIX_HPP diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/solaris/os_solaris.cpp --- a/src/hotspot/os/solaris/os_solaris.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/solaris/os_solaris.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -5192,6 +5192,72 @@ } } +// Platform Monitor implementation + +os::PlatformMonitor::PlatformMonitor() { + int status = os::Solaris::cond_init(&_cond); + assert_status(status == 0, status, "cond_init"); + status = os::Solaris::mutex_init(&_mutex); + assert_status(status == 0, status, "mutex_init"); +} + +os::PlatformMonitor::~PlatformMonitor() { + int status = os::Solaris::cond_destroy(&_cond); + assert_status(status == 0, status, "cond_destroy"); + status = os::Solaris::mutex_destroy(&_mutex); + assert_status(status == 0, status, "mutex_destroy"); +} + +void os::PlatformMonitor::lock() { + int status = os::Solaris::mutex_lock(&_mutex); + assert_status(status == 0, status, "mutex_lock"); +} + +void os::PlatformMonitor::unlock() { + int status = os::Solaris::mutex_unlock(&_mutex); + assert_status(status == 0, status, "mutex_unlock"); +} + +bool os::PlatformMonitor::try_lock() { + int status = os::Solaris::mutex_trylock(&_mutex); + assert_status(status == 0 || status == EBUSY, status, "mutex_trylock"); + return status == 0; +} + +// Must already be locked +int os::PlatformMonitor::wait(jlong millis) { + assert(millis >= 0, "negative timeout"); + if (millis > 0) { + timestruc_t abst; + int ret = OS_TIMEOUT; + compute_abstime(&abst, millis); + int status = os::Solaris::cond_timedwait(&_cond, &_mutex, &abst); + assert_status(status == 0 || status == EINTR || + status == ETIME || status == ETIMEDOUT, + status, "cond_timedwait"); + // EINTR acts as spurious wakeup - which is permitted anyway + if (status == 0 || status == EINTR) { + ret = OS_OK; + } + return ret; + } else { + int status = os::Solaris::cond_wait(&_cond, &_mutex); + assert_status(status == 0 || status == EINTR, + status, "cond_wait"); + return OS_OK; + } +} + +void os::PlatformMonitor::notify() { + int status = os::Solaris::cond_signal(&_cond); + assert_status(status == 0, status, "cond_signal"); +} + +void os::PlatformMonitor::notify_all() { + int status = os::Solaris::cond_broadcast(&_cond); + assert_status(status == 0, status, "cond_broadcast"); +} + extern char** environ; // Run the specified command in a separate process. Return its exit value, diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/solaris/os_solaris.hpp --- a/src/hotspot/os/solaris/os_solaris.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/solaris/os_solaris.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -335,4 +335,21 @@ } }; +// Platform specific implementation that underpins VM Monitor/Mutex class +class PlatformMonitor : public CHeapObj { + private: + mutex_t _mutex; // Native mutex for locking + cond_t _cond; // Native condition variable for blocking + + public: + PlatformMonitor(); + ~PlatformMonitor(); + void lock(); + void unlock(); + bool try_lock(); + int wait(jlong millis); + void notify(); + void notify_all(); +}; + #endif // OS_SOLARIS_OS_SOLARIS_HPP diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/windows/os_windows.cpp --- a/src/hotspot/os/windows/os_windows.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/windows/os_windows.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -5277,6 +5277,55 @@ SetEvent(_ParkEvent); } +// Platform Monitor implementation + +os::PlatformMonitor::PlatformMonitor() { + InitializeConditionVariable(&_cond); + InitializeCriticalSection(&_mutex); +} + +os::PlatformMonitor::~PlatformMonitor() { + DeleteCriticalSection(&_mutex); +} + +void os::PlatformMonitor::lock() { + EnterCriticalSection(&_mutex); +} + +void os::PlatformMonitor::unlock() { + LeaveCriticalSection(&_mutex); +} + +bool os::PlatformMonitor::try_lock() { + return TryEnterCriticalSection(&_mutex); +} + +// Must already be locked +int os::PlatformMonitor::wait(jlong millis) { + assert(millis >= 0, "negative timeout"); + int ret = OS_TIMEOUT; + int status = SleepConditionVariableCS(&_cond, &_mutex, + millis == 0 ? INFINITE : millis); + if (status != 0) { + ret = OS_OK; + } + #ifndef PRODUCT + else { + DWORD err = GetLastError(); + assert(err == ERROR_TIMEOUT, "SleepConditionVariableCS: %ld:", err); + } + #endif + return ret; +} + +void os::PlatformMonitor::notify() { + WakeConditionVariable(&_cond); +} + +void os::PlatformMonitor::notify_all() { + WakeAllConditionVariable(&_cond); +} + // Run the specified command in a separate process. Return its exit value, // or -1 on failure (e.g. can't create a new process). int os::fork_and_exec(char* cmd, bool use_vfork_if_available) { diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/os/windows/os_windows.hpp --- a/src/hotspot/os/windows/os_windows.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/os/windows/os_windows.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -187,4 +187,21 @@ } ; +// Platform specific implementation that underpins VM Monitor/Mutex class +class PlatformMonitor : public CHeapObj { + private: + CRITICAL_SECTION _mutex; // Native mutex for locking + CONDITION_VARIABLE _cond; // Native condition variable for blocking + + public: + PlatformMonitor(); + ~PlatformMonitor(); + void lock(); + void unlock(); + bool try_lock(); + int wait(jlong millis); + void notify(); + void notify_all(); +}; + #endif // OS_WINDOWS_OS_WINDOWS_HPP diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/logging/logTag.hpp --- a/src/hotspot/share/logging/logTag.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/logging/logTag.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -169,6 +169,7 @@ LOG_TAG(mirror) \ LOG_TAG(verification) \ LOG_TAG(verify) \ + LOG_TAG(vmmonitor) \ LOG_TAG(vmoperation) \ LOG_TAG(vmthread) \ LOG_TAG(vtables) \ diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/interfaceSupport.inline.hpp --- a/src/hotspot/share/runtime/interfaceSupport.inline.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/interfaceSupport.inline.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -286,6 +286,69 @@ } }; +// Unlike ThreadBlockInVM, this class is designed to avoid certain deadlock scenarios while making +// transitions inside class Monitor in cases where we need to block for a safepoint or handshake. It +// receives an extra argument compared to ThreadBlockInVM, the address of a pointer to the monitor we +// are trying to acquire. This will be used to access and release the monitor if needed to avoid +// said deadlocks. +// It works like ThreadBlockInVM but differs from it in two ways: +// - When transitioning in (constructor), it checks for safepoints without blocking, i.e., calls +// back if needed to allow a pending safepoint to continue but does not block in it. +// - When transitioning back (destructor), if there is a pending safepoint or handshake it releases +// the monitor that is only partially acquired. +class ThreadBlockInVMWithDeadlockCheck : public ThreadStateTransition { + private: + Monitor** _in_flight_monitor_adr; + + void release_monitor() { + assert(_in_flight_monitor_adr != NULL, "_in_flight_monitor_adr should have been set on constructor"); + Monitor* in_flight_monitor = *_in_flight_monitor_adr; + if (in_flight_monitor != NULL) { + in_flight_monitor->release_for_safepoint(); + *_in_flight_monitor_adr = NULL; + } + } + public: + ThreadBlockInVMWithDeadlockCheck(JavaThread* thread, Monitor** in_flight_monitor_adr) + : ThreadStateTransition(thread), _in_flight_monitor_adr(in_flight_monitor_adr) { + // Once we are blocked vm expects stack to be walkable + thread->frame_anchor()->make_walkable(thread); + + thread->set_thread_state((JavaThreadState)(_thread_in_vm + 1)); + InterfaceSupport::serialize_thread_state_with_handler(thread); + + SafepointMechanism::callback_if_safepoint(thread); + + thread->set_thread_state(_thread_blocked); + + CHECK_UNHANDLED_OOPS_ONLY(_thread->clear_unhandled_oops();) + } + ~ThreadBlockInVMWithDeadlockCheck() { + // Change to transition state + _thread->set_thread_state((JavaThreadState)(_thread_blocked + 1)); + + InterfaceSupport::serialize_thread_state_with_handler(_thread); + + if (SafepointMechanism::should_block(_thread)) { + release_monitor(); + SafepointMechanism::callback_if_safepoint(_thread); + // The VMThread might have read that we were in a _thread_blocked state + // and proceeded to process a handshake for us. If that's the case then + // we need to block. + // By doing this we are also making the current thread process its own + // handshake if there is one pending and the VMThread didn't try to process + // it yet. This is more of a side-effect and not really necessary; the + // handshake could be processed later on. + if (_thread->has_handshake()) { + _thread->handshake_process_by_self(); + } + } + + _thread->set_thread_state(_thread_in_vm); + CHECK_UNHANDLED_OOPS_ONLY(_thread->clear_unhandled_oops();) + } +}; + // This special transition class is only used to prevent asynchronous exceptions // from being installed on vm exit in situations where we can't tolerate them. diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/mutex.cpp --- a/src/hotspot/share/runtime/mutex.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/mutex.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,915 +23,81 @@ */ #include "precompiled.hpp" -#include "runtime/atomic.hpp" +#include "logging/log.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/mutex.hpp" -#include "runtime/orderAccess.hpp" #include "runtime/osThread.hpp" #include "runtime/safepointMechanism.inline.hpp" #include "runtime/thread.inline.hpp" #include "utilities/events.hpp" #include "utilities/macros.hpp" -// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o -// -// Native Monitor-Mutex locking - theory of operations -// -// * Native Monitors are completely unrelated to Java-level monitors, -// although the "back-end" slow-path implementations share a common lineage. -// See objectMonitor:: in synchronizer.cpp. -// Native Monitors do *not* support nesting or recursion but otherwise -// they're basically Hoare-flavor monitors. -// -// * A thread acquires ownership of a Monitor/Mutex by CASing the LockByte -// in the _LockWord from zero to non-zero. Note that the _Owner field -// is advisory and is used only to verify that the thread calling unlock() -// is indeed the last thread to have acquired the lock. -// -// * Contending threads "push" themselves onto the front of the contention -// queue -- called the cxq -- with CAS and then spin/park. -// The _LockWord contains the LockByte as well as the pointer to the head -// of the cxq. Colocating the LockByte with the cxq precludes certain races. -// -// * Using a separately addressable LockByte allows for CAS:MEMBAR or CAS:0 -// idioms. We currently use MEMBAR in the uncontended unlock() path, as -// MEMBAR often has less latency than CAS. If warranted, we could switch to -// a CAS:0 mode, using timers to close the resultant race, as is done -// with Java Monitors in synchronizer.cpp. -// -// See the following for a discussion of the relative cost of atomics (CAS) -// MEMBAR, and ways to eliminate such instructions from the common-case paths: -// -- http://blogs.sun.com/dave/entry/biased_locking_in_hotspot -// -- http://blogs.sun.com/dave/resource/MustangSync.pdf -// -- http://blogs.sun.com/dave/resource/synchronization-public2.pdf -// -- synchronizer.cpp -// -// * Overall goals - desiderata -// 1. Minimize context switching -// 2. Minimize lock migration -// 3. Minimize CPI -- affinity and locality -// 4. Minimize the execution of high-latency instructions such as CAS or MEMBAR -// 5. Minimize outer lock hold times -// 6. Behave gracefully on a loaded system -// -// * Thread flow and list residency: -// -// Contention queue --> EntryList --> OnDeck --> Owner --> !Owner -// [..resident on monitor list..] -// [...........contending..................] -// -// -- The contention queue (cxq) contains recently-arrived threads (RATs). -// Threads on the cxq eventually drain into the EntryList. -// -- Invariant: a thread appears on at most one list -- cxq, EntryList -// or WaitSet -- at any one time. -// -- For a given monitor there can be at most one "OnDeck" thread at any -// given time but if needbe this particular invariant could be relaxed. -// -// * The WaitSet and EntryList linked lists are composed of ParkEvents. -// I use ParkEvent instead of threads as ParkEvents are immortal and -// type-stable, meaning we can safely unpark() a possibly stale -// list element in the unlock()-path. (That's benign). -// -// * Succession policy - providing for progress: -// -// As necessary, the unlock()ing thread identifies, unlinks, and unparks -// an "heir presumptive" tentative successor thread from the EntryList. -// This becomes the so-called "OnDeck" thread, of which there can be only -// one at any given time for a given monitor. The wakee will recontend -// for ownership of monitor. -// -// Succession is provided for by a policy of competitive handoff. -// The exiting thread does _not_ grant or pass ownership to the -// successor thread. (This is also referred to as "handoff" succession"). -// Instead the exiting thread releases ownership and possibly wakes -// a successor, so the successor can (re)compete for ownership of the lock. -// -// Competitive handoff provides excellent overall throughput at the expense -// of short-term fairness. If fairness is a concern then one remedy might -// be to add an AcquireCounter field to the monitor. After a thread acquires -// the lock it will decrement the AcquireCounter field. When the count -// reaches 0 the thread would reset the AcquireCounter variable, abdicate -// the lock directly to some thread on the EntryList, and then move itself to the -// tail of the EntryList. -// -// But in practice most threads engage or otherwise participate in resource -// bounded producer-consumer relationships, so lock domination is not usually -// a practical concern. Recall too, that in general it's easier to construct -// a fair lock from a fast lock, but not vice-versa. -// -// * The cxq can have multiple concurrent "pushers" but only one concurrent -// detaching thread. This mechanism is immune from the ABA corruption. -// More precisely, the CAS-based "push" onto cxq is ABA-oblivious. -// We use OnDeck as a pseudo-lock to enforce the at-most-one detaching -// thread constraint. -// -// * Taken together, the cxq and the EntryList constitute or form a -// single logical queue of threads stalled trying to acquire the lock. -// We use two distinct lists to reduce heat on the list ends. -// Threads in lock() enqueue onto cxq while threads in unlock() will -// dequeue from the EntryList. (c.f. Michael Scott's "2Q" algorithm). -// A key desideratum is to minimize queue & monitor metadata manipulation -// that occurs while holding the "outer" monitor lock -- that is, we want to -// minimize monitor lock holds times. -// -// The EntryList is ordered by the prevailing queue discipline and -// can be organized in any convenient fashion, such as a doubly-linked list or -// a circular doubly-linked list. If we need a priority queue then something akin -// to Solaris' sleepq would work nicely. Viz., -// -- http://agg.eng/ws/on10_nightly/source/usr/src/uts/common/os/sleepq.c. -// -- http://cvs.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/uts/common/os/sleepq.c -// Queue discipline is enforced at ::unlock() time, when the unlocking thread -// drains the cxq into the EntryList, and orders or reorders the threads on the -// EntryList accordingly. -// -// Barring "lock barging", this mechanism provides fair cyclic ordering, -// somewhat similar to an elevator-scan. -// -// * OnDeck -// -- For a given monitor there can be at most one OnDeck thread at any given -// instant. The OnDeck thread is contending for the lock, but has been -// unlinked from the EntryList and cxq by some previous unlock() operations. -// Once a thread has been designated the OnDeck thread it will remain so -// until it manages to acquire the lock -- being OnDeck is a stable property. -// -- Threads on the EntryList or cxq are _not allowed to attempt lock acquisition. -// -- OnDeck also serves as an "inner lock" as follows. Threads in unlock() will, after -// having cleared the LockByte and dropped the outer lock, attempt to "trylock" -// OnDeck by CASing the field from null to non-null. If successful, that thread -// is then responsible for progress and succession and can use CAS to detach and -// drain the cxq into the EntryList. By convention, only this thread, the holder of -// the OnDeck inner lock, can manipulate the EntryList or detach and drain the -// RATs on the cxq into the EntryList. This avoids ABA corruption on the cxq as -// we allow multiple concurrent "push" operations but restrict detach concurrency -// to at most one thread. Having selected and detached a successor, the thread then -// changes the OnDeck to refer to that successor, and then unparks the successor. -// That successor will eventually acquire the lock and clear OnDeck. Beware -// that the OnDeck usage as a lock is asymmetric. A thread in unlock() transiently -// "acquires" OnDeck, performs queue manipulations, passes OnDeck to some successor, -// and then the successor eventually "drops" OnDeck. Note that there's never -// any sense of contention on the inner lock, however. Threads never contend -// or wait for the inner lock. -// -- OnDeck provides for futile wakeup throttling a described in section 3.3 of -// See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf -// In a sense, OnDeck subsumes the ObjectMonitor _Succ and ObjectWaiter -// TState fields found in Java-level objectMonitors. (See synchronizer.cpp). -// -// * Waiting threads reside on the WaitSet list -- wait() puts -// the caller onto the WaitSet. Notify() or notifyAll() simply -// transfers threads from the WaitSet to either the EntryList or cxq. -// Subsequent unlock() operations will eventually unpark the notifyee. -// Unparking a notifee in notify() proper is inefficient - if we were to do so -// it's likely the notifyee would simply impale itself on the lock held -// by the notifier. -// -// * The mechanism is obstruction-free in that if the holder of the transient -// OnDeck lock in unlock() is preempted or otherwise stalls, other threads -// can still acquire and release the outer lock and continue to make progress. -// At worst, waking of already blocked contending threads may be delayed, -// but nothing worse. (We only use "trylock" operations on the inner OnDeck -// lock). -// -// * Note that thread-local storage must be initialized before a thread -// uses Native monitors or mutexes. The native monitor-mutex subsystem -// depends on Thread::current(). -// -// * The monitor synchronization subsystem avoids the use of native -// synchronization primitives except for the narrow platform-specific -// park-unpark abstraction. See the comments in os_solaris.cpp regarding -// the semantics of park-unpark. Put another way, this monitor implementation -// depends only on atomic operations and park-unpark. The monitor subsystem -// manages all RUNNING->BLOCKED and BLOCKED->READY transitions while the -// underlying OS manages the READY<->RUN transitions. -// -// * The memory consistency model provide by lock()-unlock() is at least as -// strong or stronger than the Java Memory model defined by JSR-133. -// That is, we guarantee at least entry consistency, if not stronger. -// See http://g.oswego.edu/dl/jmm/cookbook.html. -// -// * Thread:: currently contains a set of purpose-specific ParkEvents: -// _MutexEvent, _ParkEvent, etc. A better approach might be to do away with -// the purpose-specific ParkEvents and instead implement a general per-thread -// stack of available ParkEvents which we could provision on-demand. The -// stack acts as a local cache to avoid excessive calls to ParkEvent::Allocate() -// and ::Release(). A thread would simply pop an element from the local stack before it -// enqueued or park()ed. When the contention was over the thread would -// push the no-longer-needed ParkEvent back onto its stack. -// -// * A slightly reduced form of ILock() and IUnlock() have been partially -// model-checked (Murphi) for safety and progress at T=1,2,3 and 4. -// It'd be interesting to see if TLA/TLC could be useful as well. -// -// * Mutex-Monitor is a low-level "leaf" subsystem. That is, the monitor -// code should never call other code in the JVM that might itself need to -// acquire monitors or mutexes. That's true *except* in the case of the -// ThreadBlockInVM state transition wrappers. The ThreadBlockInVM DTOR handles -// mutator reentry (ingress) by checking for a pending safepoint in which case it will -// call SafepointSynchronize::block(), which in turn may call Safepoint_lock->lock(), etc. -// In that particular case a call to lock() for a given Monitor can end up recursively -// calling lock() on another monitor. While distasteful, this is largely benign -// as the calls come from jacket that wraps lock(), and not from deep within lock() itself. -// -// It's unfortunate that native mutexes and thread state transitions were convolved. -// They're really separate concerns and should have remained that way. Melding -// them together was facile -- a bit too facile. The current implementation badly -// conflates the two concerns. -// -// * TODO-FIXME: -// -// -- Add DTRACE probes for contended acquire, contended acquired, contended unlock -// We should also add DTRACE probes in the ParkEvent subsystem for -// Park-entry, Park-exit, and Unpark. -// -// -- We have an excess of mutex-like constructs in the JVM, namely: -// 1. objectMonitors for Java-level synchronization (synchronizer.cpp) -// 2. low-level muxAcquire and muxRelease -// 3. low-level spinAcquire and spinRelease -// 4. native Mutex:: and Monitor:: -// 5. jvm_raw_lock() and _unlock() -// 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly -// similar name. -// -// o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o -#define UNS(x) (uintptr_t(x)) -#define TRACE(m) \ - { \ - static volatile int ctr = 0; \ - int x = ++ctr; \ - if ((x & (x - 1)) == 0) { \ - ::printf("%d:%s\n", x, #m); \ - ::fflush(stdout); \ - } \ - } - -const intptr_t _LBIT = 1; - -// Endian-ness ... index of least-significant byte in SplitWord.Bytes[] -#ifdef VM_LITTLE_ENDIAN - #define _LSBINDEX 0 -#else - #define _LSBINDEX (sizeof(intptr_t)-1) -#endif - -// Simplistic low-quality Marsaglia SHIFT-XOR RNG. -// Bijective except for the trailing mask operation. -// Useful for spin loops as the compiler can't optimize it away. - -static inline jint MarsagliaXORV(jint x) { - if (x == 0) x = 1|os::random(); - x ^= x << 6; - x ^= ((unsigned)x) >> 21; - x ^= x << 7; - return x & 0x7FFFFFFF; -} - -static int Stall(int its) { - static volatile jint rv = 1; - volatile int OnFrame = 0; - jint v = rv ^ UNS(OnFrame); - while (--its >= 0) { - v = MarsagliaXORV(v); - } - // Make this impossible for the compiler to optimize away, - // but (mostly) avoid W coherency sharing on MP systems. - if (v == 0x12345) rv = v; - return v; -} - -int Monitor::TryLock() { - intptr_t v = _LockWord.FullWord; - for (;;) { - if ((v & _LBIT) != 0) return 0; - const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v); - if (v == u) return 1; - v = u; - } -} - -int Monitor::TryFast() { - // Optimistic fast-path form ... - // Fast-path attempt for the common uncontended case. - // Avoid RTS->RTO $ coherence upgrade on typical SMP systems. - intptr_t v = Atomic::cmpxchg(_LBIT, &_LockWord.FullWord, (intptr_t)0); // agro ... - if (v == 0) return 1; - - for (;;) { - if ((v & _LBIT) != 0) return 0; - const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v); - if (v == u) return 1; - v = u; - } -} - -int Monitor::ILocked() { - const intptr_t w = _LockWord.FullWord & 0xFF; - assert(w == 0 || w == _LBIT, "invariant"); - return w == _LBIT; -} - -// Polite TATAS spinlock with exponential backoff - bounded spin. -// Ideally we'd use processor cycles, time or vtime to control -// the loop, but we currently use iterations. -// All the constants within were derived empirically but work over -// over the spectrum of J2SE reference platforms. -// On Niagara-class systems the back-off is unnecessary but -// is relatively harmless. (At worst it'll slightly retard -// acquisition times). The back-off is critical for older SMP systems -// where constant fetching of the LockWord would otherwise impair -// scalability. -// -// Clamp spinning at approximately 1/2 of a context-switch round-trip. -// See synchronizer.cpp for details and rationale. - -int Monitor::TrySpin(Thread * const Self) { - if (TryLock()) return 1; - if (!os::is_MP()) return 0; - - int Probes = 0; - int Delay = 0; - int SpinMax = 20; - for (;;) { - intptr_t v = _LockWord.FullWord; - if ((v & _LBIT) == 0) { - if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) { - return 1; - } - continue; - } - - SpinPause(); - - // Periodically increase Delay -- variable Delay form - // conceptually: delay *= 1 + 1/Exponent - ++Probes; - if (Probes > SpinMax) return 0; - - if ((Probes & 0x7) == 0) { - Delay = ((Delay << 1)|1) & 0x7FF; - // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ; - } - - // Stall for "Delay" time units - iterations in the current implementation. - // Avoid generating coherency traffic while stalled. - // Possible ways to delay: - // PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt, - // wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ... - // Note that on Niagara-class systems we want to minimize STs in the - // spin loop. N1 and brethren write-around the L1$ over the xbar into the L2$. - // Furthermore, they don't have a W$ like traditional SPARC processors. - // We currently use a Marsaglia Shift-Xor RNG loop. - if (Self != NULL) { - jint rv = Self->rng[0]; - for (int k = Delay; --k >= 0;) { - rv = MarsagliaXORV(rv); - if (SafepointMechanism::should_block(Self)) return 0; - } - Self->rng[0] = rv; - } else { - Stall(Delay); - } - } -} - -static int ParkCommon(ParkEvent * ev, jlong timo) { - // Diagnostic support - periodically unwedge blocked threads - int err = OS_OK; - if (0 == timo) { - ev->park(); - } else { - err = ev->park(timo); - } - return err; -} - -inline int Monitor::AcquireOrPush(ParkEvent * ESelf) { - intptr_t v = _LockWord.FullWord; - for (;;) { - if ((v & _LBIT) == 0) { - const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v); - if (u == v) return 1; // indicate acquired - v = u; - } else { - // Anticipate success ... - ESelf->ListNext = (ParkEvent *)(v & ~_LBIT); - const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v); - if (u == v) return 0; // indicate pushed onto cxq - v = u; - } - // Interference - LockWord change - just retry - } -} - -// ILock and IWait are the lowest level primitive internal blocking -// synchronization functions. The callers of IWait and ILock must have -// performed any needed state transitions beforehand. -// IWait and ILock may directly call park() without any concern for thread state. -// Note that ILock and IWait do *not* access _owner. -// _owner is a higher-level logical concept. - -void Monitor::ILock(Thread * Self) { - assert(_OnDeck != Self->_MutexEvent, "invariant"); - - if (TryFast()) { - Exeunt: - assert(ILocked(), "invariant"); - return; - } - - ParkEvent * const ESelf = Self->_MutexEvent; - assert(_OnDeck != ESelf, "invariant"); - - // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT - // Synchronizer.cpp uses a similar optimization. - if (TrySpin(Self)) goto Exeunt; - - // Slow-path - the lock is contended. - // Either Enqueue Self on cxq or acquire the outer lock. - // LockWord encoding = (cxq,LOCKBYTE) - ESelf->reset(); - OrderAccess::fence(); - - if (AcquireOrPush(ESelf)) goto Exeunt; - - // At any given time there is at most one ondeck thread. - // ondeck implies not resident on cxq and not resident on EntryList - // Only the OnDeck thread can try to acquire -- contend for -- the lock. - // CONSIDER: use Self->OnDeck instead of m->OnDeck. - // Deschedule Self so that others may run. - while (OrderAccess::load_acquire(&_OnDeck) != ESelf) { - ParkCommon(ESelf, 0); - } - - // Self is now in the OnDeck position and will remain so until it - // manages to acquire the lock. - for (;;) { - assert(_OnDeck == ESelf, "invariant"); - if (TrySpin(Self)) break; - // It's probably wise to spin only if we *actually* blocked - // CONSIDER: check the lockbyte, if it remains set then - // preemptively drain the cxq into the EntryList. - // The best place and time to perform queue operations -- lock metadata -- - // is _before having acquired the outer lock, while waiting for the lock to drop. - ParkCommon(ESelf, 0); - } - - assert(_OnDeck == ESelf, "invariant"); - _OnDeck = NULL; - - // Note that we current drop the inner lock (clear OnDeck) in the slow-path - // epilogue immediately after having acquired the outer lock. - // But instead we could consider the following optimizations: - // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation. - // This might avoid potential reacquisition of the inner lock in IUlock(). - // B. While still holding the inner lock, attempt to opportunistically select - // and unlink the next OnDeck thread from the EntryList. - // If successful, set OnDeck to refer to that thread, otherwise clear OnDeck. - // It's critical that the select-and-unlink operation run in constant-time as - // it executes when holding the outer lock and may artificially increase the - // effective length of the critical section. - // Note that (A) and (B) are tantamount to succession by direct handoff for - // the inner lock. - goto Exeunt; -} - -void Monitor::IUnlock(bool RelaxAssert) { - assert(ILocked(), "invariant"); - // Conceptually we need a MEMBAR #storestore|#loadstore barrier or fence immediately - // before the store that releases the lock. Crucially, all the stores and loads in the - // critical section must be globally visible before the store of 0 into the lock-word - // that releases the lock becomes globally visible. That is, memory accesses in the - // critical section should not be allowed to bypass or overtake the following ST that - // releases the lock. As such, to prevent accesses within the critical section - // from "leaking" out, we need a release fence between the critical section and the - // store that releases the lock. In practice that release barrier is elided on - // platforms with strong memory models such as TSO. - // - // Note that the OrderAccess::storeload() fence that appears after unlock store - // provides for progress conditions and succession and is _not related to exclusion - // safety or lock release consistency. - OrderAccess::release_store(&_LockWord.Bytes[_LSBINDEX], jbyte(0)); // drop outer lock - - OrderAccess::storeload(); - ParkEvent * const w = _OnDeck; // raw load as we will just return if non-NULL - assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant"); - if (w != NULL) { - // Either we have a valid ondeck thread or ondeck is transiently "locked" - // by some exiting thread as it arranges for succession. The LSBit of - // OnDeck allows us to discriminate two cases. If the latter, the - // responsibility for progress and succession lies with that other thread. - // For good performance, we also depend on the fact that redundant unpark() - // operations are cheap. That is, repeated Unpark()ing of the OnDeck thread - // is inexpensive. This approach provides implicit futile wakeup throttling. - // Note that the referent "w" might be stale with respect to the lock. - // In that case the following unpark() is harmless and the worst that'll happen - // is a spurious return from a park() operation. Critically, if "w" _is stale, - // then progress is known to have occurred as that means the thread associated - // with "w" acquired the lock. In that case this thread need take no further - // action to guarantee progress. - if ((UNS(w) & _LBIT) == 0) w->unpark(); - return; - } - - intptr_t cxq = _LockWord.FullWord; - if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) { - return; // normal fast-path exit - cxq and EntryList both empty - } - if (cxq & _LBIT) { - // Optional optimization ... - // Some other thread acquired the lock in the window since this - // thread released it. Succession is now that thread's responsibility. - return; - } - - Succession: - // Slow-path exit - this thread must ensure succession and progress. - // OnDeck serves as lock to protect cxq and EntryList. - // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq. - // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS) - // but only one concurrent consumer (detacher of RATs). - // Unlike a normal lock, however, the exiting thread "locks" OnDeck, - // picks a successor and marks that thread as OnDeck. That successor - // thread will then clear OnDeck once it eventually acquires the outer lock. - if (!Atomic::replace_if_null((ParkEvent*)_LBIT, &_OnDeck)) { - return; - } - - ParkEvent * List = _EntryList; - if (List != NULL) { - // Transfer the head of the EntryList to the OnDeck position. - // Once OnDeck, a thread stays OnDeck until it acquires the lock. - // For a given lock there is at most OnDeck thread at any one instant. - WakeOne: - assert(List == _EntryList, "invariant"); - ParkEvent * const w = List; - assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant"); - _EntryList = w->ListNext; - // as a diagnostic measure consider setting w->_ListNext = BAD - assert(intptr_t(_OnDeck) == _LBIT, "invariant"); - - // Pass OnDeck role to w, ensuring that _EntryList has been set first. - // w will clear _OnDeck once it acquires the outer lock. - // Note that once we set _OnDeck that thread can acquire the mutex, proceed - // with its critical section and then enter this code to unlock the mutex. So - // you can have multiple threads active in IUnlock at the same time. - OrderAccess::release_store(&_OnDeck, w); - - // Another optional optimization ... - // For heavily contended locks it's not uncommon that some other - // thread acquired the lock while this thread was arranging succession. - // Try to defer the unpark() operation - Delegate the responsibility - // for unpark()ing the OnDeck thread to the current or subsequent owners - // That is, the new owner is responsible for unparking the OnDeck thread. - OrderAccess::storeload(); - cxq = _LockWord.FullWord; - if (cxq & _LBIT) return; - - w->unpark(); - return; - } - - cxq = _LockWord.FullWord; - if ((cxq & ~_LBIT) != 0) { - // The EntryList is empty but the cxq is populated. - // drain RATs from cxq into EntryList - // Detach RATs segment with CAS and then merge into EntryList - for (;;) { - // optional optimization - if locked, the owner is responsible for succession - if (cxq & _LBIT) goto Punt; - const intptr_t vfy = Atomic::cmpxchg(cxq & _LBIT, &_LockWord.FullWord, cxq); - if (vfy == cxq) break; - cxq = vfy; - // Interference - LockWord changed - Just retry - // We can see concurrent interference from contending threads - // pushing themselves onto the cxq or from lock-unlock operations. - // From the perspective of this thread, EntryList is stable and - // the cxq is prepend-only -- the head is volatile but the interior - // of the cxq is stable. In theory if we encounter interference from threads - // pushing onto cxq we could simply break off the original cxq suffix and - // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts - // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD" - // when we first fetch cxq above. Between the fetch -- where we observed "A" - // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive, - // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext - // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList. - // Note too, that it's safe for this thread to traverse the cxq - // without taking any special concurrency precautions. - } - - // We don't currently reorder the cxq segment as we move it onto - // the EntryList, but it might make sense to reverse the order - // or perhaps sort by thread priority. See the comments in - // synchronizer.cpp objectMonitor::exit(). - assert(_EntryList == NULL, "invariant"); - _EntryList = List = (ParkEvent *)(cxq & ~_LBIT); - assert(List != NULL, "invariant"); - goto WakeOne; - } - - // cxq|EntryList is empty. - // w == NULL implies that cxq|EntryList == NULL in the past. - // Possible race - rare inopportune interleaving. - // A thread could have added itself to cxq since this thread previously checked. - // Detect and recover by refetching cxq. - Punt: - assert(intptr_t(_OnDeck) == _LBIT, "invariant"); - _OnDeck = NULL; // Release inner lock. - OrderAccess::storeload(); // Dekker duality - pivot point - - // Resample LockWord/cxq to recover from possible race. - // For instance, while this thread T1 held OnDeck, some other thread T2 might - // acquire the outer lock. Another thread T3 might try to acquire the outer - // lock, but encounter contention and enqueue itself on cxq. T2 then drops the - // outer lock, but skips succession as this thread T1 still holds OnDeck. - // T1 is and remains responsible for ensuring succession of T3. - // - // Note that we don't need to recheck EntryList, just cxq. - // If threads moved onto EntryList since we dropped OnDeck - // that implies some other thread forced succession. - cxq = _LockWord.FullWord; - if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) { - goto Succession; // potential race -- re-run succession - } - return; -} - -bool Monitor::notify() { - assert(_owner == Thread::current(), "invariant"); - assert(ILocked(), "invariant"); - if (_WaitSet == NULL) return true; - - // Transfer one thread from the WaitSet to the EntryList or cxq. - // Currently we just unlink the head of the WaitSet and prepend to the cxq. - // And of course we could just unlink it and unpark it, too, but - // in that case it'd likely impale itself on the reentry. - Thread::muxAcquire(_WaitLock, "notify:WaitLock"); - ParkEvent * nfy = _WaitSet; - if (nfy != NULL) { // DCL idiom - _WaitSet = nfy->ListNext; - assert(nfy->Notified == 0, "invariant"); - // push nfy onto the cxq - for (;;) { - const intptr_t v = _LockWord.FullWord; - assert((v & 0xFF) == _LBIT, "invariant"); - nfy->ListNext = (ParkEvent *)(v & ~_LBIT); - if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break; - // interference - _LockWord changed -- just retry - } - // Note that setting Notified before pushing nfy onto the cxq is - // also legal and safe, but the safety properties are much more - // subtle, so for the sake of code stewardship ... - OrderAccess::fence(); - nfy->Notified = 1; - } - Thread::muxRelease(_WaitLock); - assert(ILocked(), "invariant"); - return true; -} - -// Currently notifyAll() transfers the waiters one-at-a-time from the waitset -// to the cxq. This could be done more efficiently with a single bulk en-mass transfer, -// but in practice notifyAll() for large #s of threads is rare and not time-critical. -// Beware too, that we invert the order of the waiters. Lets say that the -// waitset is "ABCD" and the cxq is "XYZ". After a notifyAll() the waitset -// will be empty and the cxq will be "DCBAXYZ". This is benign, of course. - -bool Monitor::notify_all() { - assert(_owner == Thread::current(), "invariant"); - assert(ILocked(), "invariant"); - while (_WaitSet != NULL) notify(); - return true; -} - -int Monitor::IWait(Thread * Self, jlong timo) { - assert(ILocked(), "invariant"); - - // Phases: - // 1. Enqueue Self on WaitSet - currently prepend - // 2. unlock - drop the outer lock - // 3. wait for either notification or timeout - // 4. lock - reentry - reacquire the outer lock - - ParkEvent * const ESelf = Self->_MutexEvent; - ESelf->Notified = 0; - ESelf->reset(); - OrderAccess::fence(); - - // Add Self to WaitSet - // Ideally only the holder of the outer lock would manipulate the WaitSet - - // That is, the outer lock would implicitly protect the WaitSet. - // But if a thread in wait() encounters a timeout it will need to dequeue itself - // from the WaitSet _before it becomes the owner of the lock. We need to dequeue - // as the ParkEvent -- which serves as a proxy for the thread -- can't reside - // on both the WaitSet and the EntryList|cxq at the same time.. That is, a thread - // on the WaitSet can't be allowed to compete for the lock until it has managed to - // unlink its ParkEvent from WaitSet. Thus the need for WaitLock. - // Contention on the WaitLock is minimal. - // - // Another viable approach would be add another ParkEvent, "WaitEvent" to the - // thread class. The WaitSet would be composed of WaitEvents. Only the - // owner of the outer lock would manipulate the WaitSet. A thread in wait() - // could then compete for the outer lock, and then, if necessary, unlink itself - // from the WaitSet only after having acquired the outer lock. More precisely, - // there would be no WaitLock. A thread in in wait() would enqueue its WaitEvent - // on the WaitSet; release the outer lock; wait for either notification or timeout; - // reacquire the inner lock; and then, if needed, unlink itself from the WaitSet. - // - // Alternatively, a 2nd set of list link fields in the ParkEvent might suffice. - // One set would be for the WaitSet and one for the EntryList. - // We could also deconstruct the ParkEvent into a "pure" event and add a - // new immortal/TSM "ListElement" class that referred to ParkEvents. - // In that case we could have one ListElement on the WaitSet and another - // on the EntryList, with both referring to the same pure Event. - - Thread::muxAcquire(_WaitLock, "wait:WaitLock:Add"); - ESelf->ListNext = _WaitSet; - _WaitSet = ESelf; - Thread::muxRelease(_WaitLock); - - // Release the outer lock - // We call IUnlock (RelaxAssert=true) as a thread T1 might - // enqueue itself on the WaitSet, call IUnlock(), drop the lock, - // and then stall before it can attempt to wake a successor. - // Some other thread T2 acquires the lock, and calls notify(), moving - // T1 from the WaitSet to the cxq. T2 then drops the lock. T1 resumes, - // and then finds *itself* on the cxq. During the course of a normal - // IUnlock() call a thread should _never find itself on the EntryList - // or cxq, but in the case of wait() it's possible. - // See synchronizer.cpp objectMonitor::wait(). - IUnlock(true); - - // Wait for either notification or timeout - // Beware that in some circumstances we might propagate - // spurious wakeups back to the caller. - - for (;;) { - if (ESelf->Notified) break; - int err = ParkCommon(ESelf, timo); - if (err == OS_TIMEOUT) break; - } - - // Prepare for reentry - if necessary, remove ESelf from WaitSet - // ESelf can be: - // 1. Still on the WaitSet. This can happen if we exited the loop by timeout. - // 2. On the cxq or EntryList - // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position. - - OrderAccess::fence(); - int WasOnWaitSet = 0; - if (ESelf->Notified == 0) { - Thread::muxAcquire(_WaitLock, "wait:WaitLock:remove"); - if (ESelf->Notified == 0) { // DCL idiom - assert(_OnDeck != ESelf, "invariant"); // can't be both OnDeck and on WaitSet - // ESelf is resident on the WaitSet -- unlink it. - // A doubly-linked list would be better here so we can unlink in constant-time. - // We have to unlink before we potentially recontend as ESelf might otherwise - // end up on the cxq|EntryList -- it can't be on two lists at once. - ParkEvent * p = _WaitSet; - ParkEvent * q = NULL; // classic q chases p - while (p != NULL && p != ESelf) { - q = p; - p = p->ListNext; - } - assert(p == ESelf, "invariant"); - if (p == _WaitSet) { // found at head - assert(q == NULL, "invariant"); - _WaitSet = p->ListNext; - } else { // found in interior - assert(q->ListNext == p, "invariant"); - q->ListNext = p->ListNext; - } - WasOnWaitSet = 1; // We were *not* notified but instead encountered timeout - } - Thread::muxRelease(_WaitLock); - } - - // Reentry phase - reacquire the lock - if (WasOnWaitSet) { - // ESelf was previously on the WaitSet but we just unlinked it above - // because of a timeout. ESelf is not resident on any list and is not OnDeck - assert(_OnDeck != ESelf, "invariant"); - ILock(Self); - } else { - // A prior notify() operation moved ESelf from the WaitSet to the cxq. - // ESelf is now on the cxq, EntryList or at the OnDeck position. - // The following fragment is extracted from Monitor::ILock() - for (;;) { - if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break; - ParkCommon(ESelf, 0); - } - assert(_OnDeck == ESelf, "invariant"); - _OnDeck = NULL; - } - - assert(ILocked(), "invariant"); - return WasOnWaitSet != 0; // return true IFF timeout -} - - -// ON THE VMTHREAD SNEAKING PAST HELD LOCKS: -// In particular, there are certain types of global lock that may be held -// by a Java thread while it is blocked at a safepoint but before it has -// written the _owner field. These locks may be sneakily acquired by the -// VM thread during a safepoint to avoid deadlocks. Alternatively, one should -// identify all such locks, and ensure that Java threads never block at -// safepoints while holding them (_no_safepoint_check_flag). While it -// seems as though this could increase the time to reach a safepoint -// (or at least increase the mean, if not the variance), the latter -// approach might make for a cleaner, more maintainable JVM design. -// -// Sneaking is vile and reprehensible and should be excised at the 1st -// opportunity. It's possible that the need for sneaking could be obviated -// as follows. Currently, a thread might (a) while TBIVM, call pthread_mutex_lock -// or ILock() thus acquiring the "physical" lock underlying Monitor/Mutex. -// (b) stall at the TBIVM exit point as a safepoint is in effect. Critically, -// it'll stall at the TBIVM reentry state transition after having acquired the -// underlying lock, but before having set _owner and having entered the actual -// critical section. The lock-sneaking facility leverages that fact and allowed the -// VM thread to logically acquire locks that had already be physically locked by mutators -// but where mutators were known blocked by the reentry thread state transition. -// -// If we were to modify the Monitor-Mutex so that TBIVM state transitions tightly -// wrapped calls to park(), then we could likely do away with sneaking. We'd -// decouple lock acquisition and parking. The critical invariant to eliminating -// sneaking is to ensure that we never "physically" acquire the lock while TBIVM. -// An easy way to accomplish this is to wrap the park calls in a narrow TBIVM jacket. -// One difficulty with this approach is that the TBIVM wrapper could recurse and -// call lock() deep from within a lock() call, while the MutexEvent was already enqueued. -// Using a stack (N=2 at minimum) of ParkEvents would take care of that problem. -// -// But of course the proper ultimate approach is to avoid schemes that require explicit -// sneaking or dependence on any any clever invariants or subtle implementation properties -// of Mutex-Monitor and instead directly address the underlying design flaw. - -void Monitor::lock(Thread * Self) { +void Monitor::lock(Thread * self) { // Ensure that the Monitor requires/allows safepoint checks. assert(_safepoint_check_required != Monitor::_safepoint_check_never, "This lock should never have a safepoint check: %s", name()); #ifdef CHECK_UNHANDLED_OOPS - // Clear unhandled oops so we get a crash right away. Only clear for non-vm - // or GC threads. - if (Self->is_Java_thread()) { - Self->clear_unhandled_oops(); + // Clear unhandled oops in JavaThreads so we get a crash right away. + if (self->is_Java_thread()) { + self->clear_unhandled_oops(); } #endif // CHECK_UNHANDLED_OOPS - DEBUG_ONLY(check_prelock_state(Self, StrictSafepointChecks);) - assert(_owner != Self, "invariant"); - assert(_OnDeck != Self->_MutexEvent, "invariant"); + DEBUG_ONLY(check_prelock_state(self, StrictSafepointChecks)); + assert(_owner != self, "invariant"); + + Monitor* in_flight_monitor = NULL; + DEBUG_ONLY(int retry_cnt = 0;) + while (!_lock.try_lock()) { + // The lock is contended + + #ifdef ASSERT + check_block_state(self); + if (retry_cnt++ > 3) { + log_trace(vmmonitor)("JavaThread " INTPTR_FORMAT " on %d attempt trying to acquire vmmonitor %s", p2i(self), retry_cnt, _name); + } + #endif // ASSERT - if (TryFast()) { - Exeunt: - assert(ILocked(), "invariant"); - assert(owner() == NULL, "invariant"); - set_owner(Self); - return; + if (self->is_Java_thread()) { + assert(rank() > Mutex::special, "Potential deadlock with special or lesser rank mutex"); + { ThreadBlockInVMWithDeadlockCheck tbivmdc((JavaThread *) self, &in_flight_monitor); + in_flight_monitor = this; // save for ~ThreadBlockInVMWithDeadlockCheck + _lock.lock(); + } + if (in_flight_monitor != NULL) { + // Not unlocked by ~ThreadBlockInVMWithDeadlockCheck + break; + } + } else { + _lock.lock(); + break; + } } - // The lock is contended ... - - bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint(); - if (can_sneak && _owner == NULL) { - // a java thread has locked the lock but has not entered the - // critical region -- let's just pretend we've locked the lock - // and go on. we note this with _snuck so we can also - // pretend to unlock when the time comes. - _snuck = true; - goto Exeunt; - } - - // Try a brief spin to avoid passing thru thread state transition ... - if (TrySpin(Self)) goto Exeunt; - - DEBUG_ONLY(check_block_state(Self);) - if (Self->is_Java_thread()) { - // Horrible dictu - we suffer through a state transition - assert(rank() > Mutex::special, "Potential deadlock with special or lesser rank mutex"); - ThreadBlockInVM tbivm((JavaThread *) Self); - ILock(Self); - } else { - // Mirabile dictu - ILock(Self); - } - goto Exeunt; + assert_owner(NULL); + set_owner(self); } void Monitor::lock() { this->lock(Thread::current()); } -// Lock without safepoint check - a degenerate variant of lock(). -// Should ONLY be used by safepoint code and other code -// that is guaranteed not to block while running inside the VM. If this is called with -// thread state set to be in VM, the safepoint synchronization code will deadlock! +// Lock without safepoint check - a degenerate variant of lock() for use by +// JavaThreads when it is known to be safe to not check for a safepoint when +// acquiring this lock. If the thread blocks acquiring the lock it is not +// safepoint-safe and so will prevent a safepoint from being reached. If used +// in the wrong way this can lead to a deadlock with the safepoint code. -void Monitor::lock_without_safepoint_check(Thread * Self) { - // Ensure that the Monitor does not require or allow safepoint checks. +void Monitor::lock_without_safepoint_check(Thread * self) { + // Ensure that the Monitor does not require safepoint checks. assert(_safepoint_check_required != Monitor::_safepoint_check_always, "This lock should always have a safepoint check: %s", name()); - assert(_owner != Self, "invariant"); - ILock(Self); - assert(_owner == NULL, "invariant"); - set_owner(Self); + assert(_owner != self, "invariant"); + _lock.lock(); + assert_owner(NULL); + set_owner(self); } void Monitor::lock_without_safepoint_check() { @@ -942,117 +108,36 @@ // Returns true if thread succeeds in grabbing the lock, otherwise false. bool Monitor::try_lock() { - Thread * const Self = Thread::current(); - DEBUG_ONLY(check_prelock_state(Self, false);) - // assert(!thread->is_inside_signal_handler(), "don't lock inside signal handler"); + Thread * const self = Thread::current(); + DEBUG_ONLY(check_prelock_state(self, false);) - // Special case, where all Java threads are stopped. - // The lock may have been acquired but _owner is not yet set. - // In that case the VM thread can safely grab the lock. - // It strikes me this should appear _after the TryLock() fails, below. - bool can_sneak = Self->is_VM_thread() && SafepointSynchronize::is_at_safepoint(); - if (can_sneak && _owner == NULL) { - set_owner(Self); // Do not need to be atomic, since we are at a safepoint - _snuck = true; - return true; - } - - if (TryLock()) { - // We got the lock - assert(_owner == NULL, "invariant"); - set_owner(Self); + if (_lock.try_lock()) { + assert_owner(NULL); + set_owner(self); return true; } return false; } -void Monitor::unlock() { - assert(_owner == Thread::current(), "invariant"); - assert(_OnDeck != Thread::current()->_MutexEvent, "invariant"); - set_owner(NULL); - if (_snuck) { - assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak"); - _snuck = false; - return; - } - IUnlock(false); +void Monitor::release_for_safepoint() { + assert_owner(NULL); + _lock.unlock(); } -// Yet another degenerate version of Monitor::lock() or lock_without_safepoint_check() -// jvm_raw_lock() and _unlock() can be called by non-Java threads via JVM_RawMonitorEnter. -// -// There's no expectation that JVM_RawMonitors will interoperate properly with the native -// Mutex-Monitor constructs. We happen to implement JVM_RawMonitors in terms of -// native Mutex-Monitors simply as a matter of convenience. A simple abstraction layer -// over a pthread_mutex_t would work equally as well, but require more platform-specific -// code -- a "PlatformMutex". Alternatively, a simply layer over muxAcquire-muxRelease -// would work too. -// -// Since the caller might be a foreign thread, we don't necessarily have a Thread.MutexEvent -// instance available. Instead, we transiently allocate a ParkEvent on-demand if -// we encounter contention. That ParkEvent remains associated with the thread -// until it manages to acquire the lock, at which time we return the ParkEvent -// to the global ParkEvent free list. This is correct and suffices for our purposes. -// -// Beware that the original jvm_raw_unlock() had a "_snuck" test but that -// jvm_raw_lock() didn't have the corresponding test. I suspect that's an -// oversight, but I've replicated the original suspect logic in the new code ... - -void Monitor::jvm_raw_lock() { - assert(rank() == native, "invariant"); - - if (TryLock()) { - Exeunt: - assert(ILocked(), "invariant"); - assert(_owner == NULL, "invariant"); - // This can potentially be called by non-java Threads. Thus, the Thread::current_or_null() - // might return NULL. Don't call set_owner since it will break on an NULL owner - // Consider installing a non-null "ANON" distinguished value instead of just NULL. - _owner = Thread::current_or_null(); - return; - } - - if (TrySpin(NULL)) goto Exeunt; - - // slow-path - apparent contention - // Allocate a ParkEvent for transient use. - // The ParkEvent remains associated with this thread until - // the time the thread manages to acquire the lock. - ParkEvent * const ESelf = ParkEvent::Allocate(NULL); - ESelf->reset(); - OrderAccess::storeload(); - - // Either Enqueue Self on cxq or acquire the outer lock. - if (AcquireOrPush (ESelf)) { - ParkEvent::Release(ESelf); // surrender the ParkEvent - goto Exeunt; - } - - // At any given time there is at most one ondeck thread. - // ondeck implies not resident on cxq and not resident on EntryList - // Only the OnDeck thread can try to acquire -- contend for -- the lock. - // CONSIDER: use Self->OnDeck instead of m->OnDeck. - for (;;) { - if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break; - ParkCommon(ESelf, 0); - } - - assert(_OnDeck == ESelf, "invariant"); - _OnDeck = NULL; - ParkEvent::Release(ESelf); // surrender the ParkEvent - goto Exeunt; +void Monitor::unlock() { + assert_owner(Thread::current()); + set_owner(NULL); + _lock.unlock(); } -void Monitor::jvm_raw_unlock() { - // Nearly the same as Monitor::unlock() ... - // directly set _owner instead of using set_owner(null) - _owner = NULL; - if (_snuck) { // ??? - assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak"); - _snuck = false; - return; - } - IUnlock(false); +void Monitor::notify() { + assert_owner(Thread::current()); + _lock.notify(); +} + +void Monitor::notify_all() { + assert_owner(Thread::current()); + _lock.notify_all(); } bool Monitor::wait(bool no_safepoint_check, long timeout, @@ -1063,22 +148,24 @@ assert(!(_safepoint_check_required == Monitor::_safepoint_check_always && no_safepoint_check == true), "This lock should always have a safepoint check: %s", name()); - Thread * const Self = Thread::current(); - assert(_owner == Self, "invariant"); - assert(ILocked(), "invariant"); + // timeout is in milliseconds - with zero meaning never timeout + assert(timeout >= 0, "negative timeout"); + + Thread * const self = Thread::current(); + assert_owner(self); // as_suspend_equivalent logically implies !no_safepoint_check guarantee(!as_suspend_equivalent || !no_safepoint_check, "invariant"); // !no_safepoint_check logically implies java_thread - guarantee(no_safepoint_check || Self->is_Java_thread(), "invariant"); + guarantee(no_safepoint_check || self->is_Java_thread(), "invariant"); #ifdef ASSERT - Monitor * least = get_least_ranked_lock_besides_this(Self->owned_locks()); + Monitor * least = get_least_ranked_lock_besides_this(self->owned_locks()); assert(least != this, "Specification of get_least_... call above"); if (least != NULL && least->rank() <= special) { ::tty->print("Attempting to wait on monitor %s/%d while holding" - " lock %s/%d -- possible deadlock", - name(), rank(), least->name(), least->rank()); + " lock %s/%d -- possible deadlock", + name(), rank(), least->name(), least->rank()); assert(false, "Shouldn't block(wait) while holding a lock of rank special"); } #endif // ASSERT @@ -1088,75 +175,79 @@ // abdicating the lock in wait set_owner(NULL); if (no_safepoint_check) { - wait_status = IWait(Self, timeout); + wait_status = _lock.wait(timeout); + set_owner(self); } else { - assert(Self->is_Java_thread(), "invariant"); - JavaThread *jt = (JavaThread *)Self; + assert(self->is_Java_thread(), "invariant"); + JavaThread *jt = (JavaThread *)self; + Monitor* in_flight_monitor = NULL; - // Enter safepoint region - ornate and Rococo ... - ThreadBlockInVM tbivm(jt); - OSThreadWaitState osts(Self->osthread(), false /* not Object.wait() */); + { + ThreadBlockInVMWithDeadlockCheck tbivmdc(jt, &in_flight_monitor); + OSThreadWaitState osts(self->osthread(), false /* not Object.wait() */); + if (as_suspend_equivalent) { + jt->set_suspend_equivalent(); + // cleared by handle_special_suspend_equivalent_condition() or + // java_suspend_self() + } - if (as_suspend_equivalent) { - jt->set_suspend_equivalent(); - // cleared by handle_special_suspend_equivalent_condition() or - // java_suspend_self() + wait_status = _lock.wait(timeout); + in_flight_monitor = this; // save for ~ThreadBlockInVMWithDeadlockCheck + + // were we externally suspended while we were waiting? + if (as_suspend_equivalent && jt->handle_special_suspend_equivalent_condition()) { + // Our event wait has finished and we own the lock, but + // while we were waiting another thread suspended us. We don't + // want to hold the lock while suspended because that + // would surprise the thread that suspended us. + _lock.unlock(); + jt->java_suspend_self(); + _lock.lock(); + } } - wait_status = IWait(Self, timeout); - - // were we externally suspended while we were waiting? - if (as_suspend_equivalent && jt->handle_special_suspend_equivalent_condition()) { - // Our event wait has finished and we own the lock, but - // while we were waiting another thread suspended us. We don't - // want to hold the lock while suspended because that - // would surprise the thread that suspended us. - assert(ILocked(), "invariant"); - IUnlock(true); - jt->java_suspend_self(); - ILock(Self); - assert(ILocked(), "invariant"); + if (in_flight_monitor != NULL) { + // Not unlocked by ~ThreadBlockInVMWithDeadlockCheck + assert_owner(NULL); + // Conceptually reestablish ownership of the lock. + set_owner(self); + } else { + lock(self); } } - - // Conceptually reestablish ownership of the lock. - // The "real" lock -- the LockByte -- was reacquired by IWait(). - assert(ILocked(), "invariant"); - assert(_owner == NULL, "invariant"); - set_owner(Self); return wait_status != 0; // return true IFF timeout } + +// Temporary JVM_RawMonitor* support. +// Yet another degenerate version of Monitor::lock() or lock_without_safepoint_check() +// jvm_raw_lock() and _unlock() can be called by non-Java threads via JVM_RawMonitorEnter. +// There's no expectation that JVM_RawMonitors will interoperate properly with the native +// Mutex-Monitor constructs. We happen to implement JVM_RawMonitors in terms of +// native Mutex-Monitors simply as a matter of convenience. + +void Monitor::jvm_raw_lock() { + _lock.lock(); + assert_owner(NULL); +} + +void Monitor::jvm_raw_unlock() { + assert_owner(NULL); + _lock.unlock(); +} + Monitor::~Monitor() { -#ifdef ASSERT - uintptr_t owner = UNS(_owner); - uintptr_t lockword = UNS(_LockWord.FullWord); - uintptr_t entrylist = UNS(_EntryList); - uintptr_t waitset = UNS(_WaitSet); - uintptr_t ondeck = UNS(_OnDeck); - // Print _name with precision limit, in case failure is due to memory - // corruption that also trashed _name. - assert((owner|lockword|entrylist|waitset|ondeck) == 0, - "%.*s: _owner(" INTPTR_FORMAT ")|_LockWord(" INTPTR_FORMAT ")|_EntryList(" INTPTR_FORMAT ")|_WaitSet(" - INTPTR_FORMAT ")|_OnDeck(" INTPTR_FORMAT ") != 0", - MONITOR_NAME_LEN, _name, owner, lockword, entrylist, waitset, ondeck); -#endif + assert_owner(NULL); } void Monitor::ClearMonitor(Monitor * m, const char *name) { m->_owner = NULL; - m->_snuck = false; if (name == NULL) { strcpy(m->_name, "UNKNOWN"); } else { strncpy(m->_name, name, MONITOR_NAME_LEN - 1); m->_name[MONITOR_NAME_LEN - 1] = '\0'; } - m->_LockWord.FullWord = 0; - m->_EntryList = NULL; - m->_OnDeck = NULL; - m->_WaitSet = NULL; - m->_WaitLock[0] = 0; } Monitor::Monitor() { @@ -1186,9 +277,7 @@ } bool Monitor::owned_by_self() const { - bool ret = _owner == Thread::current(); - assert(!ret || _LockWord.Bytes[_LSBINDEX] != 0, "invariant"); - return ret; + return _owner == Thread::current(); } void Monitor::print_on_error(outputStream* st) const { @@ -1197,21 +286,32 @@ st->print(" - owner thread: " PTR_FORMAT, p2i(_owner)); } - - - // ---------------------------------------------------------------------------------- // Non-product code #ifndef PRODUCT void Monitor::print_on(outputStream* st) const { - st->print_cr("Mutex: [" PTR_FORMAT "/" PTR_FORMAT "] %s - owner: " PTR_FORMAT, - p2i(this), _LockWord.FullWord, _name, p2i(_owner)); + st->print_cr("Mutex: [" PTR_FORMAT "] %s - owner: " PTR_FORMAT, + p2i(this), _name, p2i(_owner)); } #endif #ifndef PRODUCT #ifdef ASSERT + +void Monitor::assert_owner(Thread * expected) { + const char* msg = "invalid owner"; + if (expected == NULL) { + msg = "should be un-owned"; + } + else if (expected == Thread::current()) { + msg = "should be owned by current thread"; + } + assert(_owner == expected, + "%s: owner=" INTPTR_FORMAT ", should be=" INTPTR_FORMAT, + msg, p2i(_owner), p2i(expected)); +} + Monitor * Monitor::get_least_ranked_lock(Monitor * locks) { Monitor *res, *tmp; for (res = tmp = locks; tmp != NULL; tmp = tmp->next()) { @@ -1297,8 +397,8 @@ // Deadlock avoidance rules require us to acquire Mutexes only in // a global total order. For example m1 is the lowest ranked mutex // that the thread holds and m2 is the mutex the thread is trying - // to acquire, then deadlock avoidance rules require that the rank - // of m2 be less than the rank of m1. + // to acquire, then deadlock avoidance rules require that the rank + // of m2 be less than the rank of m1. // The rank Mutex::native is an exception in that it is not subject // to the verification rules. // Here are some further notes relating to mutex acquisition anomalies: diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/mutex.hpp --- a/src/hotspot/share/runtime/mutex.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/mutex.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -29,50 +29,10 @@ #include "runtime/os.hpp" #include "utilities/histogram.hpp" -// The SplitWord construct allows us to colocate the contention queue -// (cxq) with the lock-byte. The queue elements are ParkEvents, which are -// always aligned on 256-byte addresses - the least significant byte of -// a ParkEvent is always 0. Colocating the lock-byte with the queue -// allows us to easily avoid what would otherwise be a race in lock() -// if we were to use two completely separate fields for the contention queue -// and the lock indicator. Specifically, colocation renders us immune -// from the race where a thread might enqueue itself in the lock() slow-path -// immediately after the lock holder drops the outer lock in the unlock() -// fast-path. -// -// Colocation allows us to use a fast-path unlock() form that uses -// A MEMBAR instead of a CAS. MEMBAR has lower local latency than CAS -// on many platforms. -// -// See: -// + http://blogs.sun.com/dave/entry/biased_locking_in_hotspot -// + http://blogs.sun.com/dave/resource/synchronization-public2.pdf -// -// Note that we're *not* using word-tearing the classic sense. -// The lock() fast-path will CAS the lockword and the unlock() -// fast-path will store into the lock-byte colocated within the lockword. -// We depend on the fact that all our reference platforms have -// coherent and atomic byte accesses. More precisely, byte stores -// interoperate in a safe, sane, and expected manner with respect to -// CAS, ST and LDs to the full-word containing the byte. -// If you're porting HotSpot to a platform where that isn't the case -// then you'll want change the unlock() fast path from: -// STB;MEMBAR #storeload; LDN -// to a full-word CAS of the lockword. - -union SplitWord { // full-word with separately addressable LSB - volatile intptr_t FullWord ; - volatile void * Address ; - volatile jbyte Bytes [sizeof(intptr_t)] ; -} ; - -class ParkEvent ; - -// See orderAccess.hpp. We assume throughout the VM that mutex lock and -// try_lock do fence-lock-acquire, and that unlock does a release-unlock, -// *in that order*. If their implementations change such that these -// assumptions are violated, a whole lot of code will break. +// A Mutex/Monitor is a simple wrapper around a native lock plus condition +// variable that supports lock ownership tracking, lock ranking for deadlock +// detection and coordinates with the safepoint protocol. // The default length of monitor name was originally chosen to be 64 to avoid // false sharing. Now, PaddedMonitor is available for this purpose. @@ -118,22 +78,10 @@ native = max_nonleaf + 1 }; - // The WaitSet and EntryList linked lists are composed of ParkEvents. - // I use ParkEvent instead of threads as ParkEvents are immortal and - // type-stable, meaning we can safely unpark() a possibly stale - // list element in the unlock()-path. - protected: // Monitor-Mutex metadata - SplitWord _LockWord ; // Contention queue (cxq) colocated with Lock-byte Thread * volatile _owner; // The owner of the lock - // Consider sequestering _owner on its own $line - // to aid future synchronization mechanisms. - ParkEvent * volatile _EntryList ; // List of threads waiting for entry - ParkEvent * volatile _OnDeck ; // heir-presumptive - volatile intptr_t _WaitLock [1] ; // Protects _WaitSet - ParkEvent * volatile _WaitSet ; // LL of ParkEvents - volatile bool _snuck; // Used for sneaky locking (evil). - char _name[MONITOR_NAME_LEN]; // Name of mutex + os::PlatformMonitor _lock; // Native monitor implementation + char _name[MONITOR_NAME_LEN]; // Name of mutex/monitor // Debugging fields for naming, deadlock detection, etc. (some only used in debug mode) #ifndef PRODUCT @@ -149,8 +97,8 @@ void set_owner_implementation(Thread* owner) PRODUCT_RETURN; void check_prelock_state (Thread* thread, bool safepoint_check) PRODUCT_RETURN; void check_block_state (Thread* thread) PRODUCT_RETURN; + void assert_owner (Thread* expected) NOT_DEBUG_RETURN; - // platform-dependent support code can go here (in os_.cpp) public: enum { _no_safepoint_check_flag = true, @@ -164,6 +112,9 @@ // consistent checking for each lock. // A few existing locks will sometimes have a safepoint check and // sometimes not, but these locks are set up in such a way to avoid deadlocks. + // Note: monitors that may be shared between JavaThreads and the VMThread + // should never encounter a safepoint check whilst they are held, else a + // deadlock with the VMThread can occur. enum SafepointCheckRequired { _safepoint_check_never, // Monitors with this value will cause errors // when acquired with a safepoint check. @@ -176,22 +127,6 @@ NOT_PRODUCT(SafepointCheckRequired _safepoint_check_required;) - enum WaitResults { - CONDVAR_EVENT, // Wait returned because of condition variable notification - INTERRUPT_EVENT, // Wait returned because waiting thread was interrupted - NUMBER_WAIT_RESULTS - }; - - private: - int TrySpin (Thread * Self) ; - int TryLock () ; - int TryFast () ; - int AcquireOrPush (ParkEvent * ev) ; - void IUnlock (bool RelaxAssert) ; - void ILock (Thread * Self) ; - int IWait (Thread * Self, jlong timo); - int ILocked () ; - protected: static void ClearMonitor (Monitor * m, const char* name = NULL) ; Monitor() ; @@ -208,8 +143,8 @@ bool wait(bool no_safepoint_check = !_no_safepoint_check_flag, long timeout = 0, bool as_suspend_equivalent = !_as_suspend_equivalent_flag); - bool notify(); - bool notify_all(); + void notify(); + void notify_all(); void lock(); // prints out warning if VM thread blocks @@ -219,6 +154,8 @@ bool try_lock(); // Like lock(), but unblocking. It returns false instead + void release_for_safepoint(); + // Lock without safepoint check. Should ONLY be used by safepoint code and other code // that is guaranteed not to block while running inside the VM. void lock_without_safepoint_check(); @@ -290,9 +227,6 @@ // there may have been some benefit to having distinct mutexes and monitors, but that time // has past. // -// The Mutex/Monitor design parallels that of Java-monitors, being based on -// thread-specific park-unpark platform-specific primitives. - class Mutex : public Monitor { // degenerate Monitor public: @@ -300,8 +234,8 @@ SafepointCheckRequired safepoint_check_required = _safepoint_check_always); // default destructor private: - bool notify () { ShouldNotReachHere(); return false; } - bool notify_all() { ShouldNotReachHere(); return false; } + void notify () { ShouldNotReachHere(); } + void notify_all() { ShouldNotReachHere(); } bool wait (bool no_safepoint_check, long timeout, bool as_suspend_equivalent) { ShouldNotReachHere() ; return false ; diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/mutexLocker.hpp --- a/src/hotspot/share/runtime/mutexLocker.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/mutexLocker.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -272,18 +272,16 @@ return false; } - bool notify_all() { + void notify_all() { if (_monitor != NULL) { - return _monitor->notify_all(); + _monitor->notify_all(); } - return true; } - bool notify() { + void notify() { if (_monitor != NULL) { - return _monitor->notify(); + _monitor->notify(); } - return true; } }; diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/safepoint.cpp --- a/src/hotspot/share/runtime/safepoint.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/safepoint.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -793,7 +793,7 @@ // ------------------------------------------------------------------------------------------------------- // Implementation of Safepoint callback point -void SafepointSynchronize::block(JavaThread *thread) { +void SafepointSynchronize::block(JavaThread *thread, bool block_in_safepoint_check) { assert(thread != NULL, "thread must be set"); assert(thread->is_Java_thread(), "not a Java thread"); @@ -848,28 +848,37 @@ } } - // We transition the thread to state _thread_blocked here, but - // we can't do our usual check for external suspension and then - // self-suspend after the lock_without_safepoint_check() call - // below because we are often called during transitions while - // we hold different locks. That would leave us suspended while - // holding a resource which results in deadlocks. - thread->set_thread_state(_thread_blocked); - Safepoint_lock->unlock(); + if (block_in_safepoint_check) { + // We transition the thread to state _thread_blocked here, but + // we can't do our usual check for external suspension and then + // self-suspend after the lock_without_safepoint_check() call + // below because we are often called during transitions while + // we hold different locks. That would leave us suspended while + // holding a resource which results in deadlocks. + thread->set_thread_state(_thread_blocked); + Safepoint_lock->unlock(); - // We now try to acquire the threads lock. Since this lock is hold by the VM thread during - // the entire safepoint, the threads will all line up here during the safepoint. - Threads_lock->lock_without_safepoint_check(); - // restore original state. This is important if the thread comes from compiled code, so it - // will continue to execute with the _thread_in_Java state. - thread->set_thread_state(state); - Threads_lock->unlock(); + // We now try to acquire the threads lock. Since this lock is hold by the VM thread during + // the entire safepoint, the threads will all line up here during the safepoint. + Threads_lock->lock_without_safepoint_check(); + // restore original state. This is important if the thread comes from compiled code, so it + // will continue to execute with the _thread_in_Java state. + thread->set_thread_state(state); + Threads_lock->unlock(); + } else { + // We choose not to block in this call since we would be + // caught when transitioning back anyways if the safepoint + // is still going on. + thread->set_thread_state(state); + Safepoint_lock->unlock(); + } break; case _thread_in_native_trans: case _thread_blocked_trans: case _thread_new_trans: - if (thread->safepoint_state()->type() == ThreadSafepointState::_call_back) { + if (thread->safepoint_state()->type() == ThreadSafepointState::_call_back && + block_in_safepoint_check) { thread->print_thread_state(); fatal("Deadlock in safepoint code. " "Should have called back to the VM before blocking."); diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/safepoint.hpp --- a/src/hotspot/share/runtime/safepoint.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/safepoint.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -143,7 +143,7 @@ } // Called when a thread voluntarily blocks - static void block(JavaThread *thread); + static void block(JavaThread *thread, bool block_in_safepoint_check = true); friend class SafepointMechanism; diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/safepointMechanism.hpp --- a/src/hotspot/share/runtime/safepointMechanism.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/safepointMechanism.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -72,12 +72,15 @@ #endif } - // Call this method to see if this thread should block for a safepoint. + // Call this method to see if this thread should block for a safepoint or process handshake. static inline bool should_block(Thread* thread); - // Blocks a thread until safepoint is completed + // Blocks a thread until safepoint/handshake is completed. static inline void block_if_requested(JavaThread* thread); + // Calls back if there is a pending safepoint but does not block for it. + static inline void callback_if_safepoint(JavaThread* thread); + // Caller is responsible for using a memory barrier if needed. static inline void arm_local_poll(JavaThread* thread); static inline void disarm_local_poll(JavaThread* thread); diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/safepointMechanism.inline.hpp --- a/src/hotspot/share/runtime/safepointMechanism.inline.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/safepointMechanism.inline.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -62,6 +62,20 @@ block_if_requested_slow(thread); } +void SafepointMechanism::callback_if_safepoint(JavaThread* thread) { + if (!uses_thread_local_poll() || local_poll_armed(thread)) { + // If using thread local polls, we should not check the + // global_poll() and callback via block() if the VMThread + // has not yet armed the local poll. Otherwise, when used in + // combination with should_block(), the latter could miss + // detecting the same safepoint that this method would detect + // if only checking global polls. + if (global_poll()) { + SafepointSynchronize::block(thread, false); + } + } +} + void SafepointMechanism::arm_local_poll(JavaThread* thread) { thread->set_polling_page(poll_armed_value()); } diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/thread.cpp --- a/src/hotspot/share/runtime/thread.cpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/thread.cpp Tue Feb 05 15:12:13 2019 -0500 @@ -294,7 +294,6 @@ // and ::Release() _ParkEvent = ParkEvent::Allocate(this); _SleepEvent = ParkEvent::Allocate(this); - _MutexEvent = ParkEvent::Allocate(this); _MuxEvent = ParkEvent::Allocate(this); #ifdef CHECK_UNHANDLED_OOPS @@ -460,7 +459,6 @@ // We NULL out the fields for good hygiene. ParkEvent::Release(_ParkEvent); _ParkEvent = NULL; ParkEvent::Release(_SleepEvent); _SleepEvent = NULL; - ParkEvent::Release(_MutexEvent); _MutexEvent = NULL; ParkEvent::Release(_MuxEvent); _MuxEvent = NULL; delete handle_area(); diff -r 2c6c0fabe6a2 -r 043ae846819f src/hotspot/share/runtime/thread.hpp --- a/src/hotspot/share/runtime/thread.hpp Tue Feb 05 13:21:59 2019 -0500 +++ b/src/hotspot/share/runtime/thread.hpp Tue Feb 05 15:12:13 2019 -0500 @@ -782,7 +782,6 @@ volatile int _TypeTag; ParkEvent * _ParkEvent; // for synchronized() ParkEvent * _SleepEvent; // for Thread.sleep - ParkEvent * _MutexEvent; // for native internal Mutex/Monitor ParkEvent * _MuxEvent; // for low-level muxAcquire-muxRelease int NativeSyncRecursion; // diagnostic @@ -792,8 +791,6 @@ jint _hashStateY; jint _hashStateZ; - volatile jint rng[4]; // RNG for spin loop - // Low-level leaf-lock primitives used to implement synchronization // and native monitor-mutex infrastructure. // Not for general synchronization use.