# HG changeset patch # User dcubed # Date 1413307932 25200 # Node ID 785a8d56024c6c7dc4f10d4ca8eeeaf22a413fae # Parent 6523fa019ffa77d611589850142f9a520fa81cfb 8049737: Contended Locking reorder and cache line bucket Summary: JEP-143/JDK-8046133 - optimization #1 - reorder and cache line bucket. Reviewed-by: shade, dice, dholmes, dsimms Contributed-by: dave.dice@oracle.com, karen.kinnear@oracle.com, daniel.daugherty@oracle.com diff -r 6523fa019ffa -r 785a8d56024c hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java Tue Oct 14 10:32:12 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -50,8 +50,8 @@ ownerFieldOffset = f.getOffset(); f = type.getField("FreeNext"); FreeNextFieldOffset = f.getOffset(); - countField = type.getCIntegerField("_count"); - waitersField = type.getCIntegerField("_waiters"); + countField = type.getJIntField("_count"); + waitersField = type.getJIntField("_waiters"); recursionsField = type.getCIntegerField("_recursions"); } @@ -81,15 +81,15 @@ // FIXME // void set_owner(void* owner); - public long waiters() { return waitersField.getValue(addr); } + public int waiters() { return waitersField.getValue(addr); } public Address freeNext() { return addr.getAddressAt(FreeNextFieldOffset); } // FIXME // void set_queue(void* owner); - public long count() { return countField.getValue(addr); } + public int count() { return countField.getValue(addr); } // FIXME - // void set_count(intptr_t count); + // void set_count(int count); public long recursions() { return recursionsField.getValue(addr); } @@ -97,18 +97,9 @@ return addr.getOopHandleAt(objectFieldOffset); } - public long contentions() { - // refer to objectMonitor_xxx.inline.hpp - contentions definition. - // for Solaris and Linux, contentions is same as count. for Windows - // it is different (objectMonitor_win32.inline.hpp) - long count = count(); - if (VM.getVM().getOS().equals("win32")) { - // don't count the owner of the monitor - return count > 0? count - 1 : 0; - } else { - // Solaris and Linux - return count; - } + // contentions is always equal to count + public int contentions() { + return count(); } // FIXME @@ -123,8 +114,8 @@ private static long objectFieldOffset; private static long ownerFieldOffset; private static long FreeNextFieldOffset; - private static CIntegerField countField; - private static CIntegerField waitersField; + private static JIntField countField; + private static JIntField waitersField; private static CIntegerField recursionsField; // FIXME: expose platform-dependent stuff } diff -r 6523fa019ffa -r 785a8d56024c hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java Tue Oct 14 10:32:12 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -48,9 +48,17 @@ blockListField = type.getAddressField("gBlockList"); gBlockListAddr = blockListField.getValue(); blockSize = db.lookupIntConstant("ObjectSynchronizer::_BLOCKSIZE").intValue(); + defaultCacheLineSize = db.lookupIntConstant("DEFAULT_CACHE_LINE_SIZE").intValue(); } catch (RuntimeException e) { } type = db.lookupType("ObjectMonitor"); objectMonitorTypeSize = type.getSize(); + if ((objectMonitorTypeSize % defaultCacheLineSize) != 0) { + // sizeof(ObjectMonitor) is not already a multiple of a cache line. + // The ObjectMonitor allocation code in ObjectSynchronizer pads each + // ObjectMonitor in a block to the next cache line boundary. + int needLines = ((int)objectMonitorTypeSize / defaultCacheLineSize) + 1; + objectMonitorTypeSize = needLines * defaultCacheLineSize; + } } public long identityHashValueFor(Oop obj) { @@ -122,6 +130,7 @@ private static Address gBlockListAddr; private static int blockSize; + private static int defaultCacheLineSize; private static long objectMonitorTypeSize; } diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,4 +38,26 @@ #define SUPPORTS_NATIVE_CX8 +// The expected size in bytes of a cache line, used to pad data structures. +#if defined(TIERED) + #ifdef _LP64 + // tiered, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #else + // tiered, 32-bit, medium machine + #define DEFAULT_CACHE_LINE_SIZE 64 + #endif +#elif defined(COMPILER1) + // pure C1, 32-bit, small machine + #define DEFAULT_CACHE_LINE_SIZE 16 +#elif defined(COMPILER2) || defined(SHARK) + #ifdef _LP64 + // pure C2, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #else + // pure C2, 32-bit, medium machine + #define DEFAULT_CACHE_LINE_SIZE 64 + #endif +#endif + #endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp --- a/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,4 +35,27 @@ #define SUPPORTS_NATIVE_CX8 +// The expected size in bytes of a cache line, used to pad data structures. +#if defined(TIERED) + #ifdef _LP64 + // tiered, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #else + // tiered, 32-bit, medium machine + #define DEFAULT_CACHE_LINE_SIZE 64 + #endif +#elif defined(COMPILER1) + // pure C1, 32-bit, small machine + // i486 was the last Intel chip with 16-byte cache line size + #define DEFAULT_CACHE_LINE_SIZE 32 +#elif defined(COMPILER2) || defined(SHARK) + #ifdef _LP64 + // pure C2, 64-bit, large machine + #define DEFAULT_CACHE_LINE_SIZE 128 + #else + // pure C2, 32-bit, medium machine + #define DEFAULT_CACHE_LINE_SIZE 64 + #endif +#endif + #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/memory/padded.hpp --- a/hotspot/src/share/vm/memory/padded.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/memory/padded.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -76,10 +76,16 @@ // if the start address is a multiple of alignment. template class PaddedEnd : public PaddedEndImpl { - // C++ don't allow zero-length arrays. The padding is put in a + // C++ doesn't allow zero-length arrays. The padding is put in a // super class that is specialized for the pad_size == 0 case. }; +// Similar to PaddedEnd, this macro defines a _pad_buf#id field +// that is (alignment - size) bytes in size. This macro is used +// to add padding in between non-class fields in a class or struct. +#define DEFINE_PAD_MINUS_SIZE(id, alignment, size) \ + char _pad_buf##id[(alignment) - (size)] + // Helper class to create an array of PaddedEnd objects. All elements will // start at a multiple of alignment and the size will be aligned to alignment. template diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/prims/jvmtiEnvBase.cpp --- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp Tue Oct 14 10:32:12 2014 -0700 @@ -1031,7 +1031,7 @@ // implied else: entry_count == 0 } - int nWant,nWait; + jint nWant, nWait; if (mon != NULL) { // this object has a heavyweight monitor nWant = mon->contentions(); // # of threads contending for monitor diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/objectMonitor.cpp --- a/hotspot/src/share/vm/runtime/objectMonitor.cpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp Tue Oct 14 10:32:12 2014 -0700 @@ -257,7 +257,6 @@ assert(_recursions == 0, "internal state error"); _owner = THREAD; _recursions = 1; - OwnerIsThread = 1; return true; } if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) { @@ -280,7 +279,6 @@ // Either ASSERT _recursions == 0 or explicitly set _recursions = 0. assert(_recursions == 0, "invariant"); assert(_owner == Self, "invariant"); - // CONSIDER: set or assert OwnerIsThread == 1 return; } @@ -296,7 +294,6 @@ // Commute owner from a thread-specific on-stack BasicLockObject address to // a full-fledged "Thread *". _owner = Self; - OwnerIsThread = 1; return; } @@ -328,7 +325,7 @@ // Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy(). // Ensure the object-monitor relationship remains stable while there's contention. - Atomic::inc_ptr(&_count); + Atomic::inc(&_count); EventJavaMonitorEnter event; @@ -384,7 +381,7 @@ // acquire it. } - Atomic::dec_ptr(&_count); + Atomic::dec(&_count); assert(_count >= 0, "invariant"); Self->_Stalled = 0; @@ -440,7 +437,6 @@ // Either guarantee _recursions == 0 or set _recursions = 0. assert(_recursions == 0, "invariant"); assert(_owner == Self, "invariant"); - // CONSIDER: set or assert that OwnerIsThread == 1 return 1; } // The lock had been free momentarily, but we lost the race to the lock. @@ -922,7 +918,6 @@ assert(_recursions == 0, "invariant"); _owner = THREAD; _recursions = 0; - OwnerIsThread = 1; } else { // Apparent unbalanced locking ... // Naively we'd like to throw IllegalMonitorStateException. @@ -1346,7 +1341,6 @@ assert(_recursions == 0, "internal state error"); _owner = THREAD; // Convert from basiclock addr to Thread addr _recursions = 0; - OwnerIsThread = 1; } } @@ -1385,7 +1379,6 @@ if (THREAD->is_lock_owned((address) _owner)) { \ _owner = THREAD; /* Convert from basiclock addr to Thread addr */ \ _recursions = 0; \ - OwnerIsThread = 1; \ } else { \ TEVENT(Throw IMSX); \ THROW(vmSymbols::java_lang_IllegalMonitorStateException()); \ @@ -1906,8 +1899,8 @@ // a contending thread could enqueue itself on the cxq and then spin locally // on a thread-specific variable such as its ParkEvent._Event flag. // That's left as an exercise for the reader. Note that global spinning is -// not problematic on Niagara, as the L2$ serves the interconnect and has both -// low latency and massive bandwidth. +// not problematic on Niagara, as the L2 cache serves the interconnect and +// has both low latency and massive bandwidth. // // Broadly, we can fix the spin frequency -- that is, the % of contended lock // acquisition attempts where we opt to spin -- at 100% and vary the spin count @@ -2208,7 +2201,7 @@ // as advisory. // // Beware too, that _owner is sometimes a BasicLock address and sometimes -// a thread pointer. We differentiate the two cases with OwnerIsThread. +// a thread pointer. // Alternately, we might tag the type (thread pointer vs basiclock pointer) // with the LSB of _owner. Another option would be to probablistically probe // the putative _owner->TypeTag value. @@ -2230,9 +2223,7 @@ int ObjectMonitor::NotRunnable(Thread * Self, Thread * ox) { - // Check either OwnerIsThread or ox->TypeTag == 2BAD. - if (!OwnerIsThread) return 0; - + // Check ox->TypeTag == 2BAD. if (ox == NULL) return 0; // Avoid transitive spinning ... @@ -2399,20 +2390,6 @@ } } - -// Compile-time asserts -// When possible, it's better to catch errors deterministically at -// compile-time than at runtime. The down-side to using compile-time -// asserts is that error message -- often something about negative array -// indices -- is opaque. - -#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); } - -void ObjectMonitor::ctAsserts() { - CTASSERT(offset_of (ObjectMonitor, _header) == 0); -} - - static char * kvGet(char * kvList, const char * Key) { if (kvList == NULL) return NULL; size_t n = strlen(Key); @@ -2526,6 +2503,8 @@ if (verbose) { tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT, sizeof(ObjectMonitor)); + tty->print_cr("INFO: sizeof(PaddedEnd)=" SIZE_FORMAT, + sizeof(PaddedEnd)); } uint cache_line_size = VM_Version::L1_data_cache_line_size(); @@ -2559,9 +2538,9 @@ warning_cnt++; } - if ((sizeof(ObjectMonitor) % cache_line_size) != 0) { - tty->print_cr("WARNING: ObjectMonitor size is not a multiple of " - "a cache line which permits false sharing."); + if ((sizeof(PaddedEnd) % cache_line_size) != 0) { + tty->print_cr("WARNING: PaddedEnd size is not a " + "multiple of a cache line which permits false sharing."); warning_cnt++; } } diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/objectMonitor.hpp --- a/hotspot/src/share/vm/runtime/objectMonitor.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -25,6 +25,7 @@ #ifndef SHARE_VM_RUNTIME_OBJECTMONITOR_HPP #define SHARE_VM_RUNTIME_OBJECTMONITOR_HPP +#include "memory/padded.hpp" #include "runtime/os.hpp" #include "runtime/park.hpp" #include "runtime/perfData.hpp" @@ -58,21 +59,71 @@ // forward declaration to avoid include tracing.hpp class EventJavaMonitorWait; -// WARNING: -// This is a very sensitive and fragile class. DO NOT make any -// change unless you are fully aware of the underlying semantics. - -// This class can not inherit from any other class, because I have -// to let the displaced header be the very first word. Otherwise I -// have to let markOop include this file, which would export the -// monitor data structure to everywhere. +// The ObjectMonitor class implements the heavyweight version of a +// JavaMonitor. The lightweight BasicLock/stack lock version has been +// inflated into an ObjectMonitor. This inflation is typically due to +// contention or use of Object.wait(). +// +// WARNING: This is a very sensitive and fragile class. DO NOT make any +// changes unless you are fully aware of the underlying semantics. +// +// Class JvmtiRawMonitor currently inherits from ObjectMonitor so +// changes in this class must be careful to not break JvmtiRawMonitor. +// These two subsystems should be separated. +// +// ObjectMonitor Layout Overview/Highlights/Restrictions: // -// The ObjectMonitor class is used to implement JavaMonitors which have -// transformed from the lightweight structure of the thread stack to a -// heavy weight lock due to contention - -// It is also used as RawMonitor by the JVMTI - +// - The _header field must be at offset 0 because the displaced header +// from markOop is stored there. We do not want markOop.hpp to include +// ObjectMonitor.hpp to avoid exposing ObjectMonitor everywhere. This +// means that ObjectMonitor cannot inherit from any other class nor can +// it use any virtual member functions. This restriction is critical to +// the proper functioning of the VM. +// - The _header and _owner fields should be separated by enough space +// to avoid false sharing due to parallel access by different threads. +// This is an advisory recommendation. +// - The general layout of the fields in ObjectMonitor is: +// _header +// +// +// _owner +// +// - The VM assumes write ordering and machine word alignment with +// respect to the _owner field and the that can +// be read in parallel by other threads. +// - Generally fields that are accessed closely together in time should +// be placed proximally in space to promote data cache locality. That +// is, temporal locality should condition spatial locality. +// - We have to balance avoiding false sharing with excessive invalidation +// from coherence traffic. As such, we try to cluster fields that tend +// to be _written_ at approximately the same time onto the same data +// cache line. +// - We also have to balance the natural tension between minimizing +// single threaded capacity misses with excessive multi-threaded +// coherency misses. There is no single optimal layout for both +// single-threaded and multi-threaded environments. +// +// - See ObjectMonitor::sanity_checks() for how critical restrictions are +// enforced and advisory recommendations are reported. +// - Adjacent ObjectMonitors should be separated by enough space to avoid +// false sharing. This is handled by the ObjectMonitor allocation code +// in synchronizer.cpp. Also see ObjectSynchronizer::sanity_checks(). +// +// Futures notes: +// - Separating _owner from the by enough space to +// avoid false sharing might be profitable. Given +// http://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate +// we know that the CAS in monitorenter will invalidate the line +// underlying _owner. We want to avoid an L1 data cache miss on that +// same line for monitorexit. Putting these : +// _recursions, _EntryList, _cxq, and _succ, all of which may be +// fetched in the inflated unlock path, on a different cache line +// would make them immune to CAS-based invalidation from the _owner +// field. +// +// - The _recursions field should be of type int, or int32_t but not +// intptr_t. There's no reason to use a 64-bit type for this field +// in a 64-bit JVM. class ObjectMonitor { public: @@ -84,7 +135,84 @@ OM_TIMED_OUT // Object.wait() timed out }; + private: + friend class ObjectSynchronizer; + friend class ObjectWaiter; + friend class VMStructs; + + volatile markOop _header; // displaced object header word - mark + void* volatile _object; // backward object pointer - strong root public: + ObjectMonitor * FreeNext; // Free list linkage + private: + DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, + sizeof(volatile markOop) + sizeof(void * volatile) + + sizeof(ObjectMonitor *)); + protected: // protected for JvmtiRawMonitor + void * volatile _owner; // pointer to owning thread OR BasicLock + volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor + volatile intptr_t _recursions; // recursion count, 0 for first entry + ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry. + // The list is actually composed of WaitNodes, + // acting as proxies for Threads. + private: + ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry. + Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling + Thread * volatile _Responsible; + + volatile int _Spinner; // for exit->spinner handoff optimization + volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate + volatile int _SpinClock; + volatile intptr_t _SpinState; // MCS/CLH list of spinners + volatile int _SpinDuration; + + volatile jint _count; // reference count to prevent reclamation/deflation + // at stop-the-world time. See deflate_idle_monitors(). + // _count is approximately |_WaitSet| + |_EntryList| + protected: + ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor + volatile jint _waiters; // number of waiting threads + private: + volatile int _WaitSetLock; // protects Wait Queue - simple spinlock + + public: + static void Initialize(); + static PerfCounter * _sync_ContendedLockAttempts; + static PerfCounter * _sync_FutileWakeups; + static PerfCounter * _sync_Parks; + static PerfCounter * _sync_EmptyNotifications; + static PerfCounter * _sync_Notifications; + static PerfCounter * _sync_SlowEnter; + static PerfCounter * _sync_SlowExit; + static PerfCounter * _sync_SlowNotify; + static PerfCounter * _sync_SlowNotifyAll; + static PerfCounter * _sync_FailedSpins; + static PerfCounter * _sync_SuccessfulSpins; + static PerfCounter * _sync_PrivateA; + static PerfCounter * _sync_PrivateB; + static PerfCounter * _sync_MonInCirculation; + static PerfCounter * _sync_MonScavenged; + static PerfCounter * _sync_Inflations; + static PerfCounter * _sync_Deflations; + static PerfLongVariable * _sync_MonExtant; + + static int Knob_Verbose; + static int Knob_VerifyInUse; + static int Knob_SpinLimit; + + void* operator new (size_t size) throw() { + return AllocateHeap(size, mtInternal); + } + void* operator new[] (size_t size) throw() { + return operator new (size); + } + void operator delete(void* p) { + FreeHeap(p, mtInternal); + } + void operator delete[] (void *p) { + operator delete(p); + } + // TODO-FIXME: the "offset" routines should return a type of off_t instead of int ... // ByteSize would also be an appropriate type. static int header_offset_in_bytes() { return offset_of(ObjectMonitor, _header); } @@ -100,14 +228,11 @@ static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible); } static int Spinner_offset_in_bytes() { return offset_of(ObjectMonitor, _Spinner); } - public: // Eventually we'll make provisions for multiple callbacks, but // now one will suffice. static int (*SpinCallbackFunction)(intptr_t, int); static intptr_t SpinCallbackArgument; - - public: markOop header() const; void set_header(markOop hdr); @@ -123,39 +248,22 @@ void* owner() const; void set_owner(void* owner); - intptr_t waiters() const; + jint waiters() const; - intptr_t count() const; - void set_count(intptr_t count); - intptr_t contentions() const; + jint count() const; + void set_count(jint count); + jint contentions() const; intptr_t recursions() const { return _recursions; } - // JVM/DI GetMonitorInfo() needs this + // JVM/TI GetObjectMonitorUsage() needs this: ObjectWaiter* first_waiter() { return _WaitSet; } ObjectWaiter* next_waiter(ObjectWaiter* o) { return o->_next; } Thread* thread_of_waiter(ObjectWaiter* o) { return o->_thread; } - // initialize the monitor, exception the semaphore, all other fields - // are simple integers or pointers - ObjectMonitor() { - _header = NULL; - _count = 0; - _waiters = 0; - _recursions = 0; - _object = NULL; - _owner = NULL; - _WaitSet = NULL; - _WaitSetLock = 0; - _Responsible = NULL; - _succ = NULL; - _cxq = NULL; - FreeNext = NULL; - _EntryList = NULL; - _SpinFreq = 0; - _SpinClock = 0; - OwnerIsThread = 0; - _previous_owner_tid = 0; - } + protected: + // We don't typically expect or want the ctors or dtors to run. + // normal ObjectMonitors are type-stable and immortal. + ObjectMonitor() { ::memset((void *)this, 0, sizeof(*this)); } ~ObjectMonitor() { // TODO: Add asserts ... @@ -169,7 +277,7 @@ // _cxq == 0 _succ == NULL _owner == NULL _waiters == 0 // _count == 0 EntryList == NULL // _recursions == 0 _WaitSet == NULL - // TODO: assert (is_busy()|_recursions) == 0 + assert(((is_busy()|_recursions) == 0), "freeing inuse monitor"); _succ = NULL; _EntryList = NULL; _cxq = NULL; @@ -177,7 +285,6 @@ _recursions = 0; _SpinFreq = 0; _SpinClock = 0; - OwnerIsThread = 0; } public: @@ -221,7 +328,6 @@ int TrySpin_Fixed(Thread * Self); int TrySpin_VaryFrequency(Thread * Self); int TrySpin_VaryDuration(Thread * Self); - void ctAsserts(); void ExitEpilog(Thread * Self, ObjectWaiter * Wakee); bool ExitSuspendEquivalent(JavaThread * Self); void post_monitor_wait_event(EventJavaMonitorWait * event, @@ -229,102 +335,6 @@ jlong timeout, bool timedout); - private: - friend class ObjectSynchronizer; - friend class ObjectWaiter; - friend class VMStructs; - - // WARNING: this must be the very first word of ObjectMonitor - // This means this class can't use any virtual member functions. - - volatile markOop _header; // displaced object header word - mark - void* volatile _object; // backward object pointer - strong root - - double SharingPad[1]; // temp to reduce false sharing - - // All the following fields must be machine word aligned - // The VM assumes write ordering wrt these fields, which can be - // read from other threads. - - protected: // protected for jvmtiRawMonitor - void * volatile _owner; // pointer to owning thread OR BasicLock - volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor - volatile intptr_t _recursions; // recursion count, 0 for first entry - private: - int OwnerIsThread; // _owner is (Thread *) vs SP/BasicLock - ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry. - // The list is actually composed of WaitNodes, acting - // as proxies for Threads. - protected: - ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry. - private: - Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling - Thread * volatile _Responsible; - int _PromptDrain; // rqst to drain cxq into EntryList ASAP - - volatile int _Spinner; // for exit->spinner handoff optimization - volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate - volatile int _SpinClock; - volatile int _SpinDuration; - volatile intptr_t _SpinState; // MCS/CLH list of spinners - - // TODO-FIXME: _count, _waiters and _recursions should be of - // type int, or int32_t but not intptr_t. There's no reason - // to use 64-bit fields for these variables on a 64-bit JVM. - - volatile intptr_t _count; // reference count to prevent reclamation/deflation - // at stop-the-world time. See deflate_idle_monitors(). - // _count is approximately |_WaitSet| + |_EntryList| - protected: - volatile intptr_t _waiters; // number of waiting threads - private: - protected: - ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor - private: - volatile int _WaitSetLock; // protects Wait Queue - simple spinlock - - public: - int _QMix; // Mixed prepend queue discipline - ObjectMonitor * FreeNext; // Free list linkage - intptr_t StatA, StatsB; - - public: - static void Initialize(); - static PerfCounter * _sync_ContendedLockAttempts; - static PerfCounter * _sync_FutileWakeups; - static PerfCounter * _sync_Parks; - static PerfCounter * _sync_EmptyNotifications; - static PerfCounter * _sync_Notifications; - static PerfCounter * _sync_SlowEnter; - static PerfCounter * _sync_SlowExit; - static PerfCounter * _sync_SlowNotify; - static PerfCounter * _sync_SlowNotifyAll; - static PerfCounter * _sync_FailedSpins; - static PerfCounter * _sync_SuccessfulSpins; - static PerfCounter * _sync_PrivateA; - static PerfCounter * _sync_PrivateB; - static PerfCounter * _sync_MonInCirculation; - static PerfCounter * _sync_MonScavenged; - static PerfCounter * _sync_Inflations; - static PerfCounter * _sync_Deflations; - static PerfLongVariable * _sync_MonExtant; - - public: - static int Knob_Verbose; - static int Knob_VerifyInUse; - static int Knob_SpinLimit; - void* operator new (size_t size) throw() { - return AllocateHeap(size, mtInternal); - } - void* operator new[] (size_t size) throw() { - return operator new (size); - } - void operator delete(void* p) { - FreeHeap(p, mtInternal); - } - void operator delete[] (void *p) { - operator delete(p); - } }; #undef TEVENT diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/objectMonitor.inline.hpp --- a/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,15 +40,11 @@ _header = hdr; } -inline intptr_t ObjectMonitor::count() const { +inline jint ObjectMonitor::count() const { return _count; } -inline void ObjectMonitor::set_count(intptr_t count) { - _count= count; -} - -inline intptr_t ObjectMonitor::waiters() const { +inline jint ObjectMonitor::waiters() const { return _waiters; } @@ -61,7 +57,7 @@ assert(_count == 0, "Fatal logic error in ObjectMonitor count!"); assert(_waiters == 0, "Fatal logic error in ObjectMonitor waiters!"); assert(_recursions == 0, "Fatal logic error in ObjectMonitor recursions!"); - assert(_object, "Fatal logic error in ObjectMonitor object!"); + assert(_object != NULL, "Fatal logic error in ObjectMonitor object!"); assert(_owner == 0, "Fatal logic error in ObjectMonitor owner!"); _header = NULL; @@ -85,7 +81,6 @@ if (THREAD != _owner) { if (THREAD->is_lock_owned((address) _owner)) { _owner = THREAD; // regain ownership of inflated monitor - OwnerIsThread = 1 ; assert (_recursions == 0, "invariant") ; } else { check_slow(THREAD); @@ -97,7 +92,7 @@ // return number of threads contending for this monitor -inline intptr_t ObjectMonitor::contentions() const { +inline jint ObjectMonitor::contentions() const { return _count; } diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/synchronizer.cpp --- a/hotspot/src/share/vm/runtime/synchronizer.cpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/synchronizer.cpp Tue Oct 14 10:32:12 2014 -0700 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "classfile/vmSymbols.hpp" +#include "memory/padded.hpp" #include "memory/resourceArea.hpp" #include "oops/markOop.hpp" #include "oops/oop.inline.hpp" @@ -110,6 +111,8 @@ #define NINFLATIONLOCKS 256 static volatile intptr_t InflationLocks[NINFLATIONLOCKS]; +// gBlockList is really PaddedEnd *, but we don't +// want to expose the PaddedEnd template more than necessary. ObjectMonitor * ObjectSynchronizer::gBlockList = NULL; ObjectMonitor * volatile ObjectSynchronizer::gFreeList = NULL; ObjectMonitor * volatile ObjectSynchronizer::gOmInUseList = NULL; @@ -410,16 +413,15 @@ // performed by the CPU(s) or platform. struct SharedGlobals { + char _pad_prefix[DEFAULT_CACHE_LINE_SIZE]; // These are highly shared mostly-read variables. - // To avoid false-sharing they need to be the sole occupants of a $ line. - double padPrefix[8]; + // To avoid false-sharing they need to be the sole occupants of a cache line. volatile int stwRandom; volatile int stwCycle; - - // Hot RW variables -- Sequester to avoid false-sharing - double padSuffix[16]; + DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2); + // Hot RW variable -- Sequester to avoid false-sharing volatile int hcSequence; - double padFinal[8]; + DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int)); }; static SharedGlobals GVars; @@ -780,18 +782,18 @@ // Visitors ... void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) { - ObjectMonitor* block = gBlockList; + PaddedEnd * block = (PaddedEnd *)gBlockList; ObjectMonitor* mid; while (block) { assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = _BLOCKSIZE - 1; i > 0; i--) { - mid = block + i; + mid = (ObjectMonitor *)(block + i); oop object = (oop) mid->object(); if (object != NULL) { closure->do_monitor(mid); } } - block = (ObjectMonitor*) block->FreeNext; + block = (PaddedEnd *) block->FreeNext; } } @@ -806,10 +808,12 @@ void ObjectSynchronizer::oops_do(OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) { + for (PaddedEnd * block = + (PaddedEnd *)gBlockList; block != NULL; + block = (PaddedEnd *)next(block)) { assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = 1; i < _BLOCKSIZE; i++) { - ObjectMonitor* mid = &block[i]; + ObjectMonitor* mid = (ObjectMonitor *)&block[i]; if (mid->object() != NULL) { f->do_oop((oop*)mid->object_addr()); } @@ -966,16 +970,29 @@ // 3: allocate a block of new ObjectMonitors // Both the local and global free lists are empty -- resort to malloc(). // In the current implementation objectMonitors are TSM - immortal. + // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want + // each ObjectMonitor to start at the beginning of a cache line, + // so we use align_size_up(). + // A better solution would be to use C++ placement-new. + // BEWARE: As it stands currently, we don't run the ctors! assert(_BLOCKSIZE > 1, "invariant"); - ObjectMonitor * temp = new ObjectMonitor[_BLOCKSIZE]; + size_t neededsize = sizeof(PaddedEnd) * _BLOCKSIZE; + PaddedEnd * temp; + size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1); + void* real_malloc_addr = (void *)NEW_C_HEAP_ARRAY(char, aligned_size, + mtInternal); + temp = (PaddedEnd *) + align_size_up((intptr_t)real_malloc_addr, + DEFAULT_CACHE_LINE_SIZE); // NOTE: (almost) no way to recover if allocation failed. // We might be able to induce a STW safepoint and scavenge enough // objectMonitors to permit progress. if (temp == NULL) { - vm_exit_out_of_memory(sizeof (ObjectMonitor[_BLOCKSIZE]), OOM_MALLOC_ERROR, + vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR, "Allocate ObjectMonitors"); } + (void)memset((void *) temp, 0, neededsize); // Format the block. // initialize the linked list, each monitor points to its next @@ -986,7 +1003,7 @@ // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; } for (int i = 1; i < _BLOCKSIZE; i++) { - temp[i].FreeNext = &temp[i+1]; + temp[i].FreeNext = (ObjectMonitor *)&temp[i+1]; } // terminate the last monitor as the end of list @@ -1141,10 +1158,6 @@ } -// Note that we could encounter some performance loss through false-sharing as -// multiple locks occupy the same $ line. Padding might be appropriate. - - ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self, oop object) { // Inflate mutates the heap ... @@ -1210,7 +1223,6 @@ // in which INFLATING appears in the mark. m->Recycle(); m->_Responsible = NULL; - m->OwnerIsThread = 0; m->_recursions = 0; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // Consider: maintain by type/class @@ -1257,8 +1269,8 @@ m->set_header(dmw); // Optimization: if the mark->locker stack address is associated - // with this thread we could simply set m->_owner = Self and - // m->OwnerIsThread = 1. Note that a thread can inflate an object + // with this thread we could simply set m->_owner = Self. + // Note that a thread can inflate an object // that it has stack-locked -- as might happen in wait() -- directly // with CAS. That is, we can avoid the xchg-NULL .... ST idiom. m->set_owner(mark->locker()); @@ -1302,7 +1314,6 @@ m->set_header(mark); m->set_owner(NULL); m->set_object(object); - m->OwnerIsThread = 1; m->_recursions = 0; m->_Responsible = NULL; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class @@ -1310,7 +1321,6 @@ if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) { m->set_object(NULL); m->set_owner(NULL); - m->OwnerIsThread = 0; m->Recycle(); omRelease(Self, m, true); m = NULL; @@ -1336,9 +1346,6 @@ } } -// Note that we could encounter some performance loss through false-sharing as -// multiple locks occupy the same $ line. Padding might be appropriate. - // Deflate_idle_monitors() is called at all safepoints, immediately // after all mutators are stopped, but before any objects have moved. @@ -1491,12 +1498,14 @@ nInuse += gOmInUseCount; } - } else for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) { + } else for (PaddedEnd * block = + (PaddedEnd *)gBlockList; block != NULL; + block = (PaddedEnd *)next(block)) { // Iterate over all extant monitors - Scavenge all idle monitors. assert(block->object() == CHAINMARKER, "must be a block header"); nInCirculation += _BLOCKSIZE; for (int i = 1; i < _BLOCKSIZE; i++) { - ObjectMonitor* mid = &block[i]; + ObjectMonitor* mid = (ObjectMonitor*)&block[i]; oop obj = (oop) mid->object(); if (obj == NULL) { @@ -1648,18 +1657,18 @@ // Verify all monitors in the monitor cache, the verification is weak. void ObjectSynchronizer::verify() { - ObjectMonitor* block = gBlockList; + PaddedEnd * block = (PaddedEnd *)gBlockList; ObjectMonitor* mid; while (block) { assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = 1; i < _BLOCKSIZE; i++) { - mid = block + i; + mid = (ObjectMonitor *)(block + i); oop object = (oop) mid->object(); if (object != NULL) { mid->verify(); } } - block = (ObjectMonitor*) block->FreeNext; + block = (PaddedEnd *) block->FreeNext; } } @@ -1668,18 +1677,19 @@ // the list of extant blocks without taking a lock. int ObjectSynchronizer::verify_objmon_isinpool(ObjectMonitor *monitor) { - ObjectMonitor* block = gBlockList; + PaddedEnd * block = (PaddedEnd *)gBlockList; while (block) { assert(block->object() == CHAINMARKER, "must be a block header"); - if (monitor > &block[0] && monitor < &block[_BLOCKSIZE]) { + if (monitor > (ObjectMonitor *)&block[0] && + monitor < (ObjectMonitor *)&block[_BLOCKSIZE]) { address mon = (address) monitor; address blk = (address) block; size_t diff = mon - blk; - assert((diff % sizeof(ObjectMonitor)) == 0, "check"); + assert((diff % sizeof(PaddedEnd)) == 0, "check"); return 1; } - block = (ObjectMonitor*) block->FreeNext; + block = (PaddedEnd *) block->FreeNext; } return 0; } diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/synchronizer.hpp --- a/hotspot/src/share/vm/runtime/synchronizer.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/synchronizer.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -134,6 +134,8 @@ private: enum { _BLOCKSIZE = 128 }; + // gBlockList is really PaddedEnd *, but we don't + // want to expose the PaddedEnd template more than necessary. static ObjectMonitor* gBlockList; static ObjectMonitor * volatile gFreeList; // global monitor in use list, for moribund threads, diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/runtime/vmStructs.cpp --- a/hotspot/src/share/vm/runtime/vmStructs.cpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Tue Oct 14 10:32:12 2014 -0700 @@ -1070,8 +1070,8 @@ volatile_nonstatic_field(ObjectMonitor, _header, markOop) \ unchecked_nonstatic_field(ObjectMonitor, _object, sizeof(void *)) /* NOTE: no type */ \ unchecked_nonstatic_field(ObjectMonitor, _owner, sizeof(void *)) /* NOTE: no type */ \ - volatile_nonstatic_field(ObjectMonitor, _count, intptr_t) \ - volatile_nonstatic_field(ObjectMonitor, _waiters, intptr_t) \ + volatile_nonstatic_field(ObjectMonitor, _count, jint) \ + volatile_nonstatic_field(ObjectMonitor, _waiters, jint) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \ nonstatic_field(ObjectMonitor, FreeNext, ObjectMonitor*) \ volatile_nonstatic_field(BasicLock, _displaced_header, markOop) \ @@ -2507,6 +2507,12 @@ declare_constant(Deoptimization::Action_make_not_compilable) \ declare_constant(Deoptimization::Action_LIMIT) \ \ + /***************************************************/ \ + /* DEFAULT_CACHE_LINE_SIZE (globalDefinitions.hpp) */ \ + /***************************************************/ \ + \ + declare_constant(DEFAULT_CACHE_LINE_SIZE) \ + \ /*********************/ \ /* Matcher (C2 only) */ \ /*********************/ \ diff -r 6523fa019ffa -r 785a8d56024c hotspot/src/share/vm/utilities/globalDefinitions.hpp --- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp Mon Oct 13 22:11:39 2014 +0200 +++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp Tue Oct 14 10:32:12 2014 -0700 @@ -540,7 +540,9 @@ // The expected size in bytes of a cache line, used to pad data structures. -#define DEFAULT_CACHE_LINE_SIZE 64 +#ifndef DEFAULT_CACHE_LINE_SIZE + #define DEFAULT_CACHE_LINE_SIZE 64 +#endif //----------------------------------------------------------------------------------------------------