# HG changeset patch # User dcubed # Date 1404410871 25200 # Node ID 381638db28e6c0971f33a1e8953446fa450a81bf # Parent 3ab9867d7786554986c63765a15c9857df96d1b0 8047104: cleanup misc issues prior to Contended Locking reorder and cache Summary: Checkpoint misc cleanups for Contended Locking prior to first optimization bucket. Reviewed-by: dholmes, sspitsyn, dice diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/bsd/vm/os_bsd.cpp --- a/hotspot/src/os/bsd/vm/os_bsd.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/bsd/vm/os_bsd.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -4218,22 +4218,12 @@ return abstime; } - -// Test-and-clear _Event, always leaves _Event set to 0, returns immediately. -// Conceptually TryPark() should be equivalent to park(0). - -int os::PlatformEvent::TryPark() { - for (;;) { - const int v = _Event; - guarantee((v == 0) || (v == 1), "invariant"); - if (Atomic::cmpxchg(0, &_Event, v) == v) return v; - } -} - void os::PlatformEvent::park() { // AKA "down()" // Invariant: Only the thread associated with the Event/PlatformEvent // may call park(). // TODO: assert that _Assoc != NULL or _Assoc == Self + assert(_nParked == 0, "invariant"); + int v; for (;;) { v = _Event; @@ -4333,8 +4323,7 @@ // 1 :=> 1 // -1 :=> either 0 or 1; must signal target thread // That is, we can safely transition _Event from -1 to either - // 0 or 1. Forcing 1 is slightly more efficient for back-to-back - // unpark() calls. + // 0 or 1. // See also: "Semaphores in Plan 9" by Mullender & Cox // // Note: Forcing a transition from "-1" to "1" on an unpark() means @@ -4541,10 +4530,9 @@ } void Parker::unpark() { - int s, status; - status = pthread_mutex_lock(_mutex); + int status = pthread_mutex_lock(_mutex); assert(status == 0, "invariant"); - s = _counter; + const int s = _counter; _counter = 1; if (s < 1) { if (WorkAroundNPTLTimedWaitHang) { diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/bsd/vm/os_bsd.hpp --- a/hotspot/src/os/bsd/vm/os_bsd.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/bsd/vm/os_bsd.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -219,7 +219,6 @@ int fired() { return _Event; } void park(); void unpark(); - int TryPark(); int park(jlong millis); void SetAssociation(Thread * a) { _Assoc = a; } }; diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/linux/vm/os_linux.cpp --- a/hotspot/src/os/linux/vm/os_linux.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/linux/vm/os_linux.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -5457,22 +5457,12 @@ return abstime; } - -// Test-and-clear _Event, always leaves _Event set to 0, returns immediately. -// Conceptually TryPark() should be equivalent to park(0). - -int os::PlatformEvent::TryPark() { - for (;;) { - const int v = _Event; - guarantee((v == 0) || (v == 1), "invariant"); - if (Atomic::cmpxchg(0, &_Event, v) == v) return v; - } -} - void os::PlatformEvent::park() { // AKA "down()" // Invariant: Only the thread associated with the Event/PlatformEvent // may call park(). // TODO: assert that _Assoc != NULL or _Assoc == Self + assert(_nParked == 0, "invariant"); + int v; for (;;) { v = _Event; @@ -5572,8 +5562,7 @@ // 1 :=> 1 // -1 :=> either 0 or 1; must signal target thread // That is, we can safely transition _Event from -1 to either - // 0 or 1. Forcing 1 is slightly more efficient for back-to-back - // unpark() calls. + // 0 or 1. // See also: "Semaphores in Plan 9" by Mullender & Cox // // Note: Forcing a transition from "-1" to "1" on an unpark() means @@ -5801,10 +5790,9 @@ } void Parker::unpark() { - int s, status; - status = pthread_mutex_lock(_mutex); + int status = pthread_mutex_lock(_mutex); assert(status == 0, "invariant"); - s = _counter; + const int s = _counter; _counter = 1; if (s < 1) { // thread might be parked diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/linux/vm/os_linux.hpp --- a/hotspot/src/os/linux/vm/os_linux.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/linux/vm/os_linux.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -315,7 +315,6 @@ int fired() { return _Event; } void park(); void unpark(); - int TryPark(); int park(jlong millis); // relative timed-wait only void SetAssociation(Thread * a) { _Assoc = a; } }; diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/solaris/vm/os_solaris.cpp --- a/hotspot/src/os/solaris/vm/os_solaris.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -5440,20 +5440,11 @@ return abstime; } -// Test-and-clear _Event, always leaves _Event set to 0, returns immediately. -// Conceptually TryPark() should be equivalent to park(0). - -int os::PlatformEvent::TryPark() { - for (;;) { - const int v = _Event; - guarantee((v == 0) || (v == 1), "invariant"); - if (Atomic::cmpxchg(0, &_Event, v) == v) return v; - } -} - void os::PlatformEvent::park() { // AKA: down() // Invariant: Only the thread associated with the Event/PlatformEvent // may call park(). + assert(_nParked == 0, "invariant"); + int v; for (;;) { v = _Event; @@ -5540,8 +5531,7 @@ // 1 :=> 1 // -1 :=> either 0 or 1; must signal target thread // That is, we can safely transition _Event from -1 to either - // 0 or 1. Forcing 1 is slightly more efficient for back-to-back - // unpark() calls. + // 0 or 1. // See also: "Semaphores in Plan 9" by Mullender & Cox // // Note: Forcing a transition from "-1" to "1" on an unpark() means @@ -5745,10 +5735,9 @@ } void Parker::unpark() { - int s, status; - status = os::Solaris::mutex_lock(_mutex); + int status = os::Solaris::mutex_lock(_mutex); assert(status == 0, "invariant"); - s = _counter; + const int s = _counter; _counter = 1; status = os::Solaris::mutex_unlock(_mutex); assert(status == 0, "invariant"); diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/solaris/vm/os_solaris.hpp --- a/hotspot/src/os/solaris/vm/os_solaris.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/solaris/vm/os_solaris.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -332,7 +332,6 @@ int fired() { return _Event; } void park(); int park(jlong millis); - int TryPark(); void unpark(); }; diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/os/windows/vm/os_windows.cpp --- a/hotspot/src/os/windows/vm/os_windows.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/os/windows/vm/os_windows.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -4876,8 +4876,7 @@ // 1 :=> 1 // -1 :=> either 0 or 1; must signal target thread // That is, we can safely transition _Event from -1 to either - // 0 or 1. Forcing 1 is slightly more efficient for back-to-back - // unpark() calls. + // 0 or 1. // See also: "Semaphores in Plan 9" by Mullender & Cox // // Note: Forcing a transition from "-1" to "1" on an unpark() means diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -1130,29 +1130,30 @@ "Use LWP-based instead of libthread-based synchronization " \ "(SPARC only)") \ \ - product(ccstr, SyncKnobs, NULL, \ - "(Unstable) Various monitor synchronization tunables") \ - \ - product(intx, EmitSync, 0, \ - "(Unsafe, Unstable) " \ - "Control emission of inline sync fast-path code") \ + experimental(ccstr, SyncKnobs, NULL, \ + "(Unstable) Various monitor synchronization tunables") \ + \ + experimental(intx, EmitSync, 0, \ + "(Unsafe, Unstable) " \ + "Control emission of inline sync fast-path code") \ \ product(intx, MonitorBound, 0, "Bound Monitor population") \ \ product(bool, MonitorInUseLists, false, "Track Monitors for Deflation") \ \ - product(intx, SyncFlags, 0, "(Unsafe, Unstable) Experimental Sync flags") \ - \ - product(intx, SyncVerbose, 0, "(Unstable)") \ - \ - product(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)") \ - \ - product(intx, hashCode, 5, \ - "(Unstable) select hashCode generation algorithm") \ - \ - product(intx, WorkAroundNPTLTimedWaitHang, 1, \ - "(Unstable, Linux-specific) " \ - "avoid NPTL-FUTEX hang pthread_cond_timedwait") \ + experimental(intx, SyncFlags, 0, "(Unsafe, Unstable) " \ + "Experimental Sync flags") \ + \ + experimental(intx, SyncVerbose, 0, "(Unstable)") \ + \ + experimental(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)") \ + \ + experimental(intx, hashCode, 5, \ + "(Unstable) select hashCode generation algorithm") \ + \ + experimental(intx, WorkAroundNPTLTimedWaitHang, 1, \ + "(Unstable, Linux-specific) " \ + "avoid NPTL-FUTEX hang pthread_cond_timedwait") \ \ product(bool, FilterSpuriousWakeups, true, \ "When true prevents OS-level spurious, or premature, wakeups " \ diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/mutex.cpp --- a/hotspot/src/share/vm/runtime/mutex.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/mutex.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -1,4 +1,3 @@ - /* * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -488,7 +487,6 @@ for (;;) { assert(_OnDeck == ESelf, "invariant"); if (TrySpin(Self)) break; - // CONSIDER: if ESelf->TryPark() && TryLock() break ... // It's probably wise to spin only if we *actually* blocked // CONSIDER: check the lockbyte, if it remains set then // preemptively drain the cxq into the EntryList. diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/objectMonitor.cpp --- a/hotspot/src/share/vm/runtime/objectMonitor.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -46,9 +46,9 @@ #if defined(__GNUC__) && !defined(IA64) && !defined(PPC64) // Need to inhibit inlining for older versions of GCC to avoid build-time failures - #define ATTR __attribute__((noinline)) + #define NOINLINE __attribute__((noinline)) #else - #define ATTR + #define NOINLINE #endif @@ -103,38 +103,39 @@ // The knob* variables are effectively final. Once set they should // never be modified hence. Consider using __read_mostly with GCC. -int ObjectMonitor::Knob_Verbose = 0; -int ObjectMonitor::Knob_SpinLimit = 5000; // derived by an external tool - -static int Knob_LogSpins = 0; // enable jvmstat tally for spins -static int Knob_HandOff = 0; -static int Knob_ReportSettings = 0; +int ObjectMonitor::Knob_Verbose = 0; +int ObjectMonitor::Knob_VerifyInUse = 0; +int ObjectMonitor::Knob_SpinLimit = 5000; // derived by an external tool - +static int Knob_LogSpins = 0; // enable jvmstat tally for spins +static int Knob_HandOff = 0; +static int Knob_ReportSettings = 0; -static int Knob_SpinBase = 0; // Floor AKA SpinMin -static int Knob_SpinBackOff = 0; // spin-loop backoff -static int Knob_CASPenalty = -1; // Penalty for failed CAS -static int Knob_OXPenalty = -1; // Penalty for observed _owner change -static int Knob_SpinSetSucc = 1; // spinners set the _succ field -static int Knob_SpinEarly = 1; -static int Knob_SuccEnabled = 1; // futile wake throttling -static int Knob_SuccRestrict = 0; // Limit successors + spinners to at-most-one -static int Knob_MaxSpinners = -1; // Should be a function of # CPUs -static int Knob_Bonus = 100; // spin success bonus -static int Knob_BonusB = 100; // spin success bonus -static int Knob_Penalty = 200; // spin failure penalty -static int Knob_Poverty = 1000; -static int Knob_SpinAfterFutile = 1; // Spin after returning from park() -static int Knob_FixedSpin = 0; -static int Knob_OState = 3; // Spinner checks thread state of _owner -static int Knob_UsePause = 1; -static int Knob_ExitPolicy = 0; -static int Knob_PreSpin = 10; // 20-100 likely better -static int Knob_ResetEvent = 0; -static int BackOffMask = 0; +static int Knob_SpinBase = 0; // Floor AKA SpinMin +static int Knob_SpinBackOff = 0; // spin-loop backoff +static int Knob_CASPenalty = -1; // Penalty for failed CAS +static int Knob_OXPenalty = -1; // Penalty for observed _owner change +static int Knob_SpinSetSucc = 1; // spinners set the _succ field +static int Knob_SpinEarly = 1; +static int Knob_SuccEnabled = 1; // futile wake throttling +static int Knob_SuccRestrict = 0; // Limit successors + spinners to at-most-one +static int Knob_MaxSpinners = -1; // Should be a function of # CPUs +static int Knob_Bonus = 100; // spin success bonus +static int Knob_BonusB = 100; // spin success bonus +static int Knob_Penalty = 200; // spin failure penalty +static int Knob_Poverty = 1000; +static int Knob_SpinAfterFutile = 1; // Spin after returning from park() +static int Knob_FixedSpin = 0; +static int Knob_OState = 3; // Spinner checks thread state of _owner +static int Knob_UsePause = 1; +static int Knob_ExitPolicy = 0; +static int Knob_PreSpin = 10; // 20-100 likely better +static int Knob_ResetEvent = 0; +static int BackOffMask = 0; -static int Knob_FastHSSEC = 0; -static int Knob_MoveNotifyee = 2; // notify() - disposition of notifyee -static int Knob_QMode = 0; // EntryList-cxq policy - queue discipline -static volatile int InitDone = 0; +static int Knob_FastHSSEC = 0; +static int Knob_MoveNotifyee = 2; // notify() - disposition of notifyee +static int Knob_QMode = 0; // EntryList-cxq policy - queue discipline +static volatile int InitDone = 0; #define TrySpin TrySpin_VaryDuration @@ -199,7 +200,7 @@ // on EntryList|cxq. That is, spinning relieves contention on the "inner" // locks and monitor metadata. // -// Cxq points to the the set of Recently Arrived Threads attempting entry. +// Cxq points to the set of Recently Arrived Threads attempting entry. // Because we push threads onto _cxq with CAS, the RATs must take the form of // a singly-linked LIFO. We drain _cxq into EntryList at unlock-time when // the unlocking thread notices that EntryList is null but _cxq is != null. @@ -269,13 +270,12 @@ } } -void ATTR ObjectMonitor::enter(TRAPS) { +void NOINLINE ObjectMonitor::enter(TRAPS) { // The following code is ordered to check the most common cases first // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. Thread * const Self = THREAD; - void * cur; - cur = Atomic::cmpxchg_ptr(Self, &_owner, NULL); + void * cur = Atomic::cmpxchg_ptr (Self, &_owner, NULL); if (cur == NULL) { // Either ASSERT _recursions == 0 or explicitly set _recursions = 0. assert(_recursions == 0 , "invariant"); @@ -435,26 +435,24 @@ // Callers must compensate as needed. int ObjectMonitor::TryLock (Thread * Self) { - for (;;) { - void * own = _owner; - if (own != NULL) return 0; - if (Atomic::cmpxchg_ptr (Self, &_owner, NULL) == NULL) { - // Either guarantee _recursions == 0 or set _recursions = 0. - assert(_recursions == 0, "invariant"); - assert(_owner == Self, "invariant"); - // CONSIDER: set or assert that OwnerIsThread == 1 - return 1; - } - // The lock had been free momentarily, but we lost the race to the lock. - // Interference -- the CAS failed. - // We can either return -1 or retry. - // Retry doesn't make as much sense because the lock was just acquired. - if (true) return -1; - } + void * own = _owner; + if (own != NULL) return 0; + if (Atomic::cmpxchg_ptr (Self, &_owner, NULL) == NULL) { + // Either guarantee _recursions == 0 or set _recursions = 0. + assert(_recursions == 0, "invariant"); + assert(_owner == Self, "invariant"); + // CONSIDER: set or assert that OwnerIsThread == 1 + return 1; + } + // The lock had been free momentarily, but we lost the race to the lock. + // Interference -- the CAS failed. + // We can either return -1 or retry. + // Retry doesn't make as much sense because the lock was just acquired. + return -1; } -void ATTR ObjectMonitor::EnterI (TRAPS) { - Thread * Self = THREAD; +void NOINLINE ObjectMonitor::EnterI (TRAPS) { + Thread * const Self = THREAD; assert(Self->is_Java_thread(), "invariant"); assert(((JavaThread *) Self)->thread_state() == _thread_blocked , "invariant"); @@ -550,7 +548,7 @@ Atomic::cmpxchg_ptr(Self, &_Responsible, NULL); } - // The lock have been released while this thread was occupied queueing + // The lock might have been released while this thread was occupied queueing // itself onto _cxq. To close the race and avoid "stranding" and // progress-liveness failure we must resample-retry _owner before parking. // Note the Dekker/Lamport duality: ST cxq; MEMBAR; LD Owner. @@ -702,7 +700,7 @@ // Knob_Reset and Knob_SpinAfterFutile support and restructuring the // loop accordingly. -void ATTR ObjectMonitor::ReenterI (Thread * Self, ObjectWaiter * SelfNode) { +void NOINLINE ObjectMonitor::ReenterI (Thread * Self, ObjectWaiter * SelfNode) { assert(Self != NULL , "invariant"); assert(SelfNode != NULL , "invariant"); assert(SelfNode->_thread == Self , "invariant"); @@ -790,6 +788,7 @@ OrderAccess::fence(); // see comments at the end of EnterI() } +// By convention we unlink a contending thread from EntryList|cxq immediately // after the thread acquires the lock in ::enter(). Equally, we could defer // unlinking the thread until ::exit()-time. @@ -810,7 +809,7 @@ assert(prv == NULL || prv->TState == ObjectWaiter::TS_ENTER, "invariant"); TEVENT(Unlink from EntryList); } else { - guarantee(SelfNode->TState == ObjectWaiter::TS_CXQ, "invariant"); + assert(SelfNode->TState == ObjectWaiter::TS_CXQ, "invariant"); // Inopportune interleaving -- Self is still on the cxq. // This usually means the enqueue of self raced an exiting thread. // Normally we'll find Self near the front of the cxq, so @@ -850,10 +849,12 @@ TEVENT(Unlink from cxq); } +#ifdef ASSERT // Diagnostic hygiene ... SelfNode->_prev = (ObjectWaiter *) 0xBAD; SelfNode->_next = (ObjectWaiter *) 0xBAD; SelfNode->TState = ObjectWaiter::TS_RUN; +#endif } // ----------------------------------------------------------------------------- @@ -906,9 +907,15 @@ // the integral of the # of active timers at any instant over time). // Both impinge on OS scalability. Given that, at most one thread parked on // a monitor will use a timer. +// +// There is also the risk of a futile wake-up. If we drop the lock +// another thread can reacquire the lock immediately, and we can +// then wake a thread unnecessarily. This is benign, and we've +// structured the code so the windows are short and the frequency +// of such futile wakups is low. -void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) { - Thread * Self = THREAD; +void NOINLINE ObjectMonitor::exit(bool not_suspended, TRAPS) { + Thread * const Self = THREAD; if (THREAD != _owner) { if (THREAD->is_lock_owned((address) _owner)) { // Transmute _owner from a BasicLock pointer to a Thread address. @@ -920,14 +927,17 @@ _recursions = 0; OwnerIsThread = 1; } else { - // NOTE: we need to handle unbalanced monitor enter/exit - // in native code by throwing an exception. - // TODO: Throw an IllegalMonitorStateException ? + // Apparent unbalanced locking ... + // Naively we'd like to throw IllegalMonitorStateException. + // As a practical matter we can neither allocate nor throw an + // exception as ::exit() can be called from leaf routines. + // see x86_32.ad Fast_Unlock() and the I1 and I2 properties. + // Upon deeper reflection, however, in a properly run JVM the only + // way we should encounter this situation is in the presence of + // unbalanced JNI locking. TODO: CheckJNICalls. + // See also: CR4414101 TEVENT(Exit - Throw IMSX); - assert(false, "Non-balanced monitor enter/exit!"); - if (false) { - THROW(vmSymbols::java_lang_IllegalMonitorStateException()); - } + assert(false, "Non-balanced monitor enter/exit! Likely JNI locking"); return; } } @@ -976,6 +986,7 @@ return; } TEVENT(Inflated exit - complex egress); + // Other threads are blocked trying to acquire the lock. // Normally the exiting thread is responsible for ensuring succession, // but if other successors are ready or other entering threads are spinning @@ -1142,9 +1153,9 @@ if (w != NULL) { // I'd like to write: guarantee (w->_thread != Self). // But in practice an exiting thread may find itself on the EntryList. - // Lets say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and + // Let's say thread T1 calls O.wait(). Wait() enqueues T1 on O's waitset and // then calls exit(). Exit release the lock by setting O._owner to NULL. - // Lets say T1 then stalls. T2 acquires O and calls O.notify(). The + // Let's say T1 then stalls. T2 acquires O and calls O.notify(). The // notify() operation moves T1 from O's waitset to O's EntryList. T2 then // release the lock "O". T2 resumes immediately after the ST of null into // _owner, above. T2 notices that the EntryList is populated, so it @@ -1261,10 +1272,13 @@ // MEMBAR // LD Self_>_suspend_flags // +// UPDATE 2007-10-6: since I've replaced the native Mutex/Monitor subsystem +// with a more efficient implementation, the need to use "FastHSSEC" has +// decreased. - Dave bool ObjectMonitor::ExitSuspendEquivalent (JavaThread * jSelf) { - int Mode = Knob_FastHSSEC; + const int Mode = Knob_FastHSSEC; if (Mode && !jSelf->is_external_suspend()) { assert(jSelf->is_suspend_equivalent(), "invariant"); jSelf->clear_suspend_equivalent(); @@ -1413,7 +1427,7 @@ // Wait/Notify/NotifyAll // // Note: a subset of changes to ObjectMonitor::wait() -// will need to be replicated in complete_exit above +// will need to be replicated in complete_exit void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) { Thread * const Self = THREAD; assert(Self->is_Java_thread(), "Must be Java thread!"); @@ -2268,12 +2282,12 @@ assert(_event != NULL, "invariant"); } -void ObjectWaiter::wait_reenter_begin(ObjectMonitor *mon) { +void ObjectWaiter::wait_reenter_begin(ObjectMonitor * const mon) { JavaThread *jt = (JavaThread *)this->_thread; _active = JavaThreadBlockedOnMonitorEnterState::wait_reenter_begin(jt, mon); } -void ObjectWaiter::wait_reenter_end(ObjectMonitor *mon) { +void ObjectWaiter::wait_reenter_end(ObjectMonitor * const mon) { JavaThread *jt = (JavaThread *)this->_thread; JavaThreadBlockedOnMonitorEnterState::wait_reenter_end(jt, _active); } @@ -2455,6 +2469,7 @@ #define SETKNOB(x) { Knob_##x = kvGetInt (knobs, #x, Knob_##x); } SETKNOB(ReportSettings); SETKNOB(Verbose); + SETKNOB(VerifyInUse); SETKNOB(FixedSpin); SETKNOB(SpinLimit); SETKNOB(SpinBase); diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/objectMonitor.hpp --- a/hotspot/src/share/vm/runtime/objectMonitor.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -311,6 +311,7 @@ public: static int Knob_Verbose; + static int Knob_VerifyInUse; static int Knob_SpinLimit; void* operator new (size_t size) throw() { return AllocateHeap(size, mtInternal); diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/sharedRuntime.cpp --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -1810,14 +1810,8 @@ // Handles the uncommon case in locking, i.e., contention or an inflated lock. -#ifndef PRODUCT -int SharedRuntime::_monitor_enter_ctr=0; -#endif JRT_ENTRY_NO_ASYNC(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) oop obj(_obj); -#ifndef PRODUCT - _monitor_enter_ctr++; // monitor enter slow -#endif if (PrintBiasedLockingStatistics) { Atomic::inc(BiasedLocking::slow_path_entry_count_addr()); } @@ -1831,15 +1825,9 @@ assert(!HAS_PENDING_EXCEPTION, "Should have no exception here"); JRT_END -#ifndef PRODUCT -int SharedRuntime::_monitor_exit_ctr=0; -#endif // Handles the uncommon cases of monitor unlocking in compiled code JRT_LEAF(void, SharedRuntime::complete_monitor_unlocking_C(oopDesc* _obj, BasicLock* lock)) oop obj(_obj); -#ifndef PRODUCT - _monitor_exit_ctr++; // monitor exit slow -#endif Thread* THREAD = JavaThread::current(); // I'm not convinced we need the code contained by MIGHT_HAVE_PENDING anymore // testing was unable to ever fire the assert that guarded it so I have removed it. @@ -1879,8 +1867,6 @@ ttyLocker ttyl; if (xtty != NULL) xtty->head("statistics type='SharedRuntime'"); - if (_monitor_enter_ctr) tty->print_cr("%5d monitor enter slow", _monitor_enter_ctr); - if (_monitor_exit_ctr) tty->print_cr("%5d monitor exit slow", _monitor_exit_ctr); if (_throw_null_ctr) tty->print_cr("%5d implicit null throw", _throw_null_ctr); SharedRuntime::print_ic_miss_histogram(); @@ -2464,9 +2450,9 @@ if (PrintAdapterHandlers || PrintStubCode) { ttyLocker ttyl; entry->print_adapter_on(tty); - tty->print_cr("i2c argument handler #%d for: %s %s (%d bytes generated)", + tty->print_cr("i2c argument handler #%d for: %s %s %s (%d bytes generated)", _adapters->number_of_entries(), (method->is_static() ? "static" : "receiver"), - method->signature()->as_C_string(), insts_size); + method->signature()->as_C_string(), fingerprint->as_string(), insts_size); tty->print_cr("c2i argument handler starts at %p", entry->get_c2i_entry()); if (Verbose || PrintStubCode) { address first_pc = entry->base_address(); diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/sharedRuntime.hpp --- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -516,8 +516,6 @@ static void trace_ic_miss(address at); public: - static int _monitor_enter_ctr; // monitor enter slow - static int _monitor_exit_ctr; // monitor exit slow static int _throw_null_ctr; // throwing a null-pointer exception static int _ic_miss_ctr; // total # of IC misses static int _wrong_method_ctr; diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/synchronizer.cpp --- a/hotspot/src/share/vm/runtime/synchronizer.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/synchronizer.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -44,9 +44,9 @@ #if defined(__GNUC__) && !defined(PPC64) // Need to inhibit inlining for older versions of GCC to avoid build-time failures - #define ATTR __attribute__((noinline)) + #define NOINLINE __attribute__((noinline)) #else - #define ATTR + #define NOINLINE #endif PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC @@ -206,14 +206,6 @@ return; } -#if 0 - // The following optimization isn't particularly useful. - if (mark->has_monitor() && mark->monitor()->is_entered(THREAD)) { - lock->set_displaced_header(NULL); - return; - } -#endif - // The object header will never be displaced to this lock, // so it does not matter what the value is, except that it // must be non-zero to avoid looking like a re-entrant lock, @@ -573,7 +565,7 @@ // added check of the bias pattern is to avoid useless calls to // thread-local storage. if (obj->mark()->has_bias_pattern()) { - // Box and unbox the raw reference just in case we cause a STW safepoint. + // Handle for oop obj in case of STW safepoint Handle hobj(Self, obj); // Relaxing assertion for bug 6320749. assert(Universe::verify_in_progress() || @@ -890,23 +882,23 @@ } } } -/* Too slow for general assert or debug + void ObjectSynchronizer::verifyInUse (Thread *Self) { ObjectMonitor* mid; int inusetally = 0; for (mid = Self->omInUseList; mid != NULL; mid = mid->FreeNext) { - inusetally ++; + inusetally++; } assert(inusetally == Self->omInUseCount, "inuse count off"); int freetally = 0; for (mid = Self->omFreeList; mid != NULL; mid = mid->FreeNext) { - freetally ++; + freetally++; } assert(freetally == Self->omFreeCount, "free count off"); } -*/ -ObjectMonitor * ATTR ObjectSynchronizer::omAlloc (Thread * Self) { + +ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc (Thread * Self) { // A large MAXPRIVATE value reduces both list lock contention // and list coherency traffic, but also tends to increase the // number of objectMonitors in circulation as well as the STW @@ -932,7 +924,9 @@ m->FreeNext = Self->omInUseList; Self->omInUseList = m; Self->omInUseCount++; - // verifyInUse(Self); + if (ObjectMonitor::Knob_VerifyInUse) { + verifyInUse(Self); + } } else { m->FreeNext = NULL; } @@ -1052,7 +1046,9 @@ curmidinuse->FreeNext = mid->FreeNext; // maintain the current thread inuselist } Self->omInUseCount--; - // verifyInUse(Self); + if (ObjectMonitor::Knob_VerifyInUse) { + verifyInUse(Self); + } break; } else { curmidinuse = mid; @@ -1061,7 +1057,7 @@ } } - // FreeNext is used for both onInUseList and omFreeList, so clear old before setting new + // FreeNext is used for both omInUseList and omFreeList, so clear old before setting new m->FreeNext = Self->omFreeList; Self->omFreeList = m; Self->omFreeCount++; @@ -1074,7 +1070,7 @@ // consecutive STW safepoints. Relatedly, we might decay // omFreeProvision at STW safepoints. // -// Also return the monitors of a moribund thread"s omInUseList to +// Also return the monitors of a moribund thread's omInUseList to // a global gOmInUseList under the global list lock so these // will continue to be scanned. // @@ -1115,7 +1111,6 @@ InUseTail = curom; InUseTally++; } -// TODO debug assert(Self->omInUseCount == InUseTally, "inuse count off"); Self->omInUseCount = 0; guarantee(InUseTail != NULL && InUseList != NULL, "invariant"); @@ -1154,7 +1149,7 @@ // multiple locks occupy the same $ line. Padding might be appropriate. -ObjectMonitor * ATTR ObjectSynchronizer::inflate (Thread * Self, oop object) { +ObjectMonitor * NOINLINE ObjectSynchronizer::inflate (Thread * Self, oop object) { // Inflate mutates the heap ... // Relaxing assertion for bug 6320749. assert(Universe::verify_in_progress() || @@ -1385,7 +1380,7 @@ // Deflate a single monitor if not in use // Return true if deflated, false if in use bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj, - ObjectMonitor** FreeHeadp, ObjectMonitor** FreeTailp) { + ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp) { bool deflated; // Normal case ... The monitor is associated with obj. guarantee(obj->mark() == markOopDesc::encode(mid), "invariant"); @@ -1415,13 +1410,13 @@ assert(mid->object() == NULL, "invariant"); // Move the object to the working free list defined by FreeHead,FreeTail. - if (*FreeHeadp == NULL) *FreeHeadp = mid; - if (*FreeTailp != NULL) { - ObjectMonitor * prevtail = *FreeTailp; + if (*freeHeadp == NULL) *freeHeadp = mid; + if (*freeTailp != NULL) { + ObjectMonitor * prevtail = *freeTailp; assert(prevtail->FreeNext == NULL, "cleaned up deflated?"); // TODO KK prevtail->FreeNext = mid; } - *FreeTailp = mid; + *freeTailp = mid; deflated = true; } return deflated; @@ -1429,7 +1424,7 @@ // Caller acquires ListLock int ObjectSynchronizer::walk_monitor_list(ObjectMonitor** listheadp, - ObjectMonitor** FreeHeadp, ObjectMonitor** FreeTailp) { + ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp) { ObjectMonitor* mid; ObjectMonitor* next; ObjectMonitor* curmidinuse = NULL; @@ -1439,7 +1434,7 @@ oop obj = (oop) mid->object(); bool deflated = false; if (obj != NULL) { - deflated = deflate_monitor(mid, obj, FreeHeadp, FreeTailp); + deflated = deflate_monitor(mid, obj, freeHeadp, freeTailp); } if (deflated) { // extract from per-thread in-use-list @@ -1482,7 +1477,9 @@ nInCirculation+= cur->omInUseCount; int deflatedcount = walk_monitor_list(cur->omInUseList_addr(), &FreeHead, &FreeTail); cur->omInUseCount-= deflatedcount; - // verifyInUse(cur); + if (ObjectMonitor::Knob_VerifyInUse) { + verifyInUse(cur); + } nScavenged += deflatedcount; nInuse += cur->omInUseCount; } diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/synchronizer.hpp --- a/hotspot/src/share/vm/runtime/synchronizer.hpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/synchronizer.hpp Thu Jul 03 11:07:51 2014 -0700 @@ -84,7 +84,7 @@ static void reenter (Handle obj, intptr_t recursion, TRAPS); // thread-specific and global objectMonitor free list accessors -// static void verifyInUse (Thread * Self) ; too slow for general assert/debug + static void verifyInUse(Thread * Self); static ObjectMonitor * omAlloc(Thread * Self); static void omRelease(Thread * Self, ObjectMonitor * m, bool FromPerThreadAlloc); static void omFlush(Thread * Self); @@ -114,10 +114,10 @@ // An adaptive profile-based deflation policy could be used if needed static void deflate_idle_monitors(); static int walk_monitor_list(ObjectMonitor** listheadp, - ObjectMonitor** FreeHeadp, - ObjectMonitor** FreeTailp); - static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** FreeHeadp, - ObjectMonitor** FreeTailp); + ObjectMonitor** freeHeadp, + ObjectMonitor** freeTailp); + static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** freeHeadp, + ObjectMonitor** freeTailp); static void oops_do(OopClosure* f); // debugging @@ -130,7 +130,10 @@ enum { _BLOCKSIZE = 128 }; static ObjectMonitor* gBlockList; static ObjectMonitor * volatile gFreeList; - static ObjectMonitor * volatile gOmInUseList; // for moribund thread, so monitors they inflated still get scanned + // global monitor in use list, for moribund threads, + // monitors they inflated need to be scanned for deflation + static ObjectMonitor * volatile gOmInUseList; + // count of entries in gOmInUseList static int gOmInUseCount; }; diff -r 3ab9867d7786 -r 381638db28e6 hotspot/src/share/vm/runtime/thread.cpp --- a/hotspot/src/share/vm/runtime/thread.cpp Thu Jul 03 09:16:56 2014 -0700 +++ b/hotspot/src/share/vm/runtime/thread.cpp Thu Jul 03 11:07:51 2014 -0700 @@ -4392,6 +4392,12 @@ // It's safe if subsequent LDs and STs float "up" into the critical section, // but prior LDs and STs within the critical section can't be allowed // to reorder or float past the ST that releases the lock. + // Loads and stores in the critical section - which appear in program + // order before the store that releases the lock - must also appear + // before the store that releases the lock in memory visibility order. + // Conceptually we need a #loadstore|#storestore "release" MEMBAR before + // the ST of 0 into the lock-word which releases the lock, so fence + // more than covers this on all platforms. *adr = 0; } @@ -4573,18 +4579,25 @@ // This implementation pops from the head of the list. This is unfair, // but tends to provide excellent throughput as hot threads remain hot. // (We wake recently run threads first). - +// +// All paths through muxRelease() will execute a CAS. +// Release consistency -- We depend on the CAS in muxRelease() to provide full +// bidirectional fence/MEMBAR semantics, ensuring that all prior memory operations +// executed within the critical section are complete and globally visible before the +// store (CAS) to the lock-word that releases the lock becomes globally visible. void Thread::muxRelease (volatile intptr_t * Lock) { for (;;) { const intptr_t w = Atomic::cmpxchg_ptr(0, Lock, LOCKBIT); assert(w & LOCKBIT, "invariant"); if (w == LOCKBIT) return; - ParkEvent * List = (ParkEvent *)(w & ~LOCKBIT); + ParkEvent * const List = (ParkEvent *) (w & ~LOCKBIT); assert(List != NULL, "invariant"); assert(List->OnList == intptr_t(Lock), "invariant"); - ParkEvent * nxt = List->ListNext; + ParkEvent * const nxt = List->ListNext; + guarantee((intptr_t(nxt) & LOCKBIT) == 0, "invariant"); // The following CAS() releases the lock and pops the head element. + // The CAS() also ratifies the previously fetched lock-word value. if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) { continue; }