8049737: Contended Locking reorder and cache line bucket
Summary: JEP-143/JDK-8046133 - optimization #1 - reorder and cache line bucket.
Reviewed-by: shade, dice, dholmes, dsimms
Contributed-by: dave.dice@oracle.com, karen.kinnear@oracle.com, daniel.daugherty@oracle.com
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java Tue Oct 14 10:32:12 2014 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -50,8 +50,8 @@
ownerFieldOffset = f.getOffset();
f = type.getField("FreeNext");
FreeNextFieldOffset = f.getOffset();
- countField = type.getCIntegerField("_count");
- waitersField = type.getCIntegerField("_waiters");
+ countField = type.getJIntField("_count");
+ waitersField = type.getJIntField("_waiters");
recursionsField = type.getCIntegerField("_recursions");
}
@@ -81,15 +81,15 @@
// FIXME
// void set_owner(void* owner);
- public long waiters() { return waitersField.getValue(addr); }
+ public int waiters() { return waitersField.getValue(addr); }
public Address freeNext() { return addr.getAddressAt(FreeNextFieldOffset); }
// FIXME
// void set_queue(void* owner);
- public long count() { return countField.getValue(addr); }
+ public int count() { return countField.getValue(addr); }
// FIXME
- // void set_count(intptr_t count);
+ // void set_count(int count);
public long recursions() { return recursionsField.getValue(addr); }
@@ -97,18 +97,9 @@
return addr.getOopHandleAt(objectFieldOffset);
}
- public long contentions() {
- // refer to objectMonitor_xxx.inline.hpp - contentions definition.
- // for Solaris and Linux, contentions is same as count. for Windows
- // it is different (objectMonitor_win32.inline.hpp)
- long count = count();
- if (VM.getVM().getOS().equals("win32")) {
- // don't count the owner of the monitor
- return count > 0? count - 1 : 0;
- } else {
- // Solaris and Linux
- return count;
- }
+ // contentions is always equal to count
+ public int contentions() {
+ return count();
}
// FIXME
@@ -123,8 +114,8 @@
private static long objectFieldOffset;
private static long ownerFieldOffset;
private static long FreeNextFieldOffset;
- private static CIntegerField countField;
- private static CIntegerField waitersField;
+ private static JIntField countField;
+ private static JIntField waitersField;
private static CIntegerField recursionsField;
// FIXME: expose platform-dependent stuff
}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java Tue Oct 14 10:32:12 2014 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,9 +48,17 @@
blockListField = type.getAddressField("gBlockList");
gBlockListAddr = blockListField.getValue();
blockSize = db.lookupIntConstant("ObjectSynchronizer::_BLOCKSIZE").intValue();
+ defaultCacheLineSize = db.lookupIntConstant("DEFAULT_CACHE_LINE_SIZE").intValue();
} catch (RuntimeException e) { }
type = db.lookupType("ObjectMonitor");
objectMonitorTypeSize = type.getSize();
+ if ((objectMonitorTypeSize % defaultCacheLineSize) != 0) {
+ // sizeof(ObjectMonitor) is not already a multiple of a cache line.
+ // The ObjectMonitor allocation code in ObjectSynchronizer pads each
+ // ObjectMonitor in a block to the next cache line boundary.
+ int needLines = ((int)objectMonitorTypeSize / defaultCacheLineSize) + 1;
+ objectMonitorTypeSize = needLines * defaultCacheLineSize;
+ }
}
public long identityHashValueFor(Oop obj) {
@@ -122,6 +130,7 @@
private static Address gBlockListAddr;
private static int blockSize;
+ private static int defaultCacheLineSize;
private static long objectMonitorTypeSize;
}
--- a/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/cpu/sparc/vm/globalDefinitions_sparc.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -38,4 +38,26 @@
#define SUPPORTS_NATIVE_CX8
+// The expected size in bytes of a cache line, used to pad data structures.
+#if defined(TIERED)
+ #ifdef _LP64
+ // tiered, 64-bit, large machine
+ #define DEFAULT_CACHE_LINE_SIZE 128
+ #else
+ // tiered, 32-bit, medium machine
+ #define DEFAULT_CACHE_LINE_SIZE 64
+ #endif
+#elif defined(COMPILER1)
+ // pure C1, 32-bit, small machine
+ #define DEFAULT_CACHE_LINE_SIZE 16
+#elif defined(COMPILER2) || defined(SHARK)
+ #ifdef _LP64
+ // pure C2, 64-bit, large machine
+ #define DEFAULT_CACHE_LINE_SIZE 128
+ #else
+ // pure C2, 32-bit, medium machine
+ #define DEFAULT_CACHE_LINE_SIZE 64
+ #endif
+#endif
+
#endif // CPU_SPARC_VM_GLOBALDEFINITIONS_SPARC_HPP
--- a/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,4 +35,27 @@
#define SUPPORTS_NATIVE_CX8
+// The expected size in bytes of a cache line, used to pad data structures.
+#if defined(TIERED)
+ #ifdef _LP64
+ // tiered, 64-bit, large machine
+ #define DEFAULT_CACHE_LINE_SIZE 128
+ #else
+ // tiered, 32-bit, medium machine
+ #define DEFAULT_CACHE_LINE_SIZE 64
+ #endif
+#elif defined(COMPILER1)
+ // pure C1, 32-bit, small machine
+ // i486 was the last Intel chip with 16-byte cache line size
+ #define DEFAULT_CACHE_LINE_SIZE 32
+#elif defined(COMPILER2) || defined(SHARK)
+ #ifdef _LP64
+ // pure C2, 64-bit, large machine
+ #define DEFAULT_CACHE_LINE_SIZE 128
+ #else
+ // pure C2, 32-bit, medium machine
+ #define DEFAULT_CACHE_LINE_SIZE 64
+ #endif
+#endif
+
#endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/hotspot/src/share/vm/memory/padded.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/memory/padded.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -76,10 +76,16 @@
// if the start address is a multiple of alignment.
template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
class PaddedEnd : public PaddedEndImpl<T, PADDED_END_SIZE(T, alignment)> {
- // C++ don't allow zero-length arrays. The padding is put in a
+ // C++ doesn't allow zero-length arrays. The padding is put in a
// super class that is specialized for the pad_size == 0 case.
};
+// Similar to PaddedEnd, this macro defines a _pad_buf#id field
+// that is (alignment - size) bytes in size. This macro is used
+// to add padding in between non-class fields in a class or struct.
+#define DEFINE_PAD_MINUS_SIZE(id, alignment, size) \
+ char _pad_buf##id[(alignment) - (size)]
+
// Helper class to create an array of PaddedEnd<T> objects. All elements will
// start at a multiple of alignment and the size will be aligned to alignment.
template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
--- a/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/prims/jvmtiEnvBase.cpp Tue Oct 14 10:32:12 2014 -0700
@@ -1031,7 +1031,7 @@
// implied else: entry_count == 0
}
- int nWant,nWait;
+ jint nWant, nWait;
if (mon != NULL) {
// this object has a heavyweight monitor
nWant = mon->contentions(); // # of threads contending for monitor
--- a/hotspot/src/share/vm/runtime/objectMonitor.cpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/objectMonitor.cpp Tue Oct 14 10:32:12 2014 -0700
@@ -257,7 +257,6 @@
assert(_recursions == 0, "internal state error");
_owner = THREAD;
_recursions = 1;
- OwnerIsThread = 1;
return true;
}
if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
@@ -280,7 +279,6 @@
// Either ASSERT _recursions == 0 or explicitly set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
- // CONSIDER: set or assert OwnerIsThread == 1
return;
}
@@ -296,7 +294,6 @@
// Commute owner from a thread-specific on-stack BasicLockObject address to
// a full-fledged "Thread *".
_owner = Self;
- OwnerIsThread = 1;
return;
}
@@ -328,7 +325,7 @@
// Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy().
// Ensure the object-monitor relationship remains stable while there's contention.
- Atomic::inc_ptr(&_count);
+ Atomic::inc(&_count);
EventJavaMonitorEnter event;
@@ -384,7 +381,7 @@
// acquire it.
}
- Atomic::dec_ptr(&_count);
+ Atomic::dec(&_count);
assert(_count >= 0, "invariant");
Self->_Stalled = 0;
@@ -440,7 +437,6 @@
// Either guarantee _recursions == 0 or set _recursions = 0.
assert(_recursions == 0, "invariant");
assert(_owner == Self, "invariant");
- // CONSIDER: set or assert that OwnerIsThread == 1
return 1;
}
// The lock had been free momentarily, but we lost the race to the lock.
@@ -922,7 +918,6 @@
assert(_recursions == 0, "invariant");
_owner = THREAD;
_recursions = 0;
- OwnerIsThread = 1;
} else {
// Apparent unbalanced locking ...
// Naively we'd like to throw IllegalMonitorStateException.
@@ -1346,7 +1341,6 @@
assert(_recursions == 0, "internal state error");
_owner = THREAD; // Convert from basiclock addr to Thread addr
_recursions = 0;
- OwnerIsThread = 1;
}
}
@@ -1385,7 +1379,6 @@
if (THREAD->is_lock_owned((address) _owner)) { \
_owner = THREAD; /* Convert from basiclock addr to Thread addr */ \
_recursions = 0; \
- OwnerIsThread = 1; \
} else { \
TEVENT(Throw IMSX); \
THROW(vmSymbols::java_lang_IllegalMonitorStateException()); \
@@ -1906,8 +1899,8 @@
// a contending thread could enqueue itself on the cxq and then spin locally
// on a thread-specific variable such as its ParkEvent._Event flag.
// That's left as an exercise for the reader. Note that global spinning is
-// not problematic on Niagara, as the L2$ serves the interconnect and has both
-// low latency and massive bandwidth.
+// not problematic on Niagara, as the L2 cache serves the interconnect and
+// has both low latency and massive bandwidth.
//
// Broadly, we can fix the spin frequency -- that is, the % of contended lock
// acquisition attempts where we opt to spin -- at 100% and vary the spin count
@@ -2208,7 +2201,7 @@
// as advisory.
//
// Beware too, that _owner is sometimes a BasicLock address and sometimes
-// a thread pointer. We differentiate the two cases with OwnerIsThread.
+// a thread pointer.
// Alternately, we might tag the type (thread pointer vs basiclock pointer)
// with the LSB of _owner. Another option would be to probablistically probe
// the putative _owner->TypeTag value.
@@ -2230,9 +2223,7 @@
int ObjectMonitor::NotRunnable(Thread * Self, Thread * ox) {
- // Check either OwnerIsThread or ox->TypeTag == 2BAD.
- if (!OwnerIsThread) return 0;
-
+ // Check ox->TypeTag == 2BAD.
if (ox == NULL) return 0;
// Avoid transitive spinning ...
@@ -2399,20 +2390,6 @@
}
}
-
-// Compile-time asserts
-// When possible, it's better to catch errors deterministically at
-// compile-time than at runtime. The down-side to using compile-time
-// asserts is that error message -- often something about negative array
-// indices -- is opaque.
-
-#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); }
-
-void ObjectMonitor::ctAsserts() {
- CTASSERT(offset_of (ObjectMonitor, _header) == 0);
-}
-
-
static char * kvGet(char * kvList, const char * Key) {
if (kvList == NULL) return NULL;
size_t n = strlen(Key);
@@ -2526,6 +2503,8 @@
if (verbose) {
tty->print_cr("INFO: sizeof(ObjectMonitor)=" SIZE_FORMAT,
sizeof(ObjectMonitor));
+ tty->print_cr("INFO: sizeof(PaddedEnd<ObjectMonitor>)=" SIZE_FORMAT,
+ sizeof(PaddedEnd<ObjectMonitor>));
}
uint cache_line_size = VM_Version::L1_data_cache_line_size();
@@ -2559,9 +2538,9 @@
warning_cnt++;
}
- if ((sizeof(ObjectMonitor) % cache_line_size) != 0) {
- tty->print_cr("WARNING: ObjectMonitor size is not a multiple of "
- "a cache line which permits false sharing.");
+ if ((sizeof(PaddedEnd<ObjectMonitor>) % cache_line_size) != 0) {
+ tty->print_cr("WARNING: PaddedEnd<ObjectMonitor> size is not a "
+ "multiple of a cache line which permits false sharing.");
warning_cnt++;
}
}
--- a/hotspot/src/share/vm/runtime/objectMonitor.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -25,6 +25,7 @@
#ifndef SHARE_VM_RUNTIME_OBJECTMONITOR_HPP
#define SHARE_VM_RUNTIME_OBJECTMONITOR_HPP
+#include "memory/padded.hpp"
#include "runtime/os.hpp"
#include "runtime/park.hpp"
#include "runtime/perfData.hpp"
@@ -58,21 +59,71 @@
// forward declaration to avoid include tracing.hpp
class EventJavaMonitorWait;
-// WARNING:
-// This is a very sensitive and fragile class. DO NOT make any
-// change unless you are fully aware of the underlying semantics.
-
-// This class can not inherit from any other class, because I have
-// to let the displaced header be the very first word. Otherwise I
-// have to let markOop include this file, which would export the
-// monitor data structure to everywhere.
+// The ObjectMonitor class implements the heavyweight version of a
+// JavaMonitor. The lightweight BasicLock/stack lock version has been
+// inflated into an ObjectMonitor. This inflation is typically due to
+// contention or use of Object.wait().
+//
+// WARNING: This is a very sensitive and fragile class. DO NOT make any
+// changes unless you are fully aware of the underlying semantics.
+//
+// Class JvmtiRawMonitor currently inherits from ObjectMonitor so
+// changes in this class must be careful to not break JvmtiRawMonitor.
+// These two subsystems should be separated.
+//
+// ObjectMonitor Layout Overview/Highlights/Restrictions:
//
-// The ObjectMonitor class is used to implement JavaMonitors which have
-// transformed from the lightweight structure of the thread stack to a
-// heavy weight lock due to contention
-
-// It is also used as RawMonitor by the JVMTI
-
+// - The _header field must be at offset 0 because the displaced header
+// from markOop is stored there. We do not want markOop.hpp to include
+// ObjectMonitor.hpp to avoid exposing ObjectMonitor everywhere. This
+// means that ObjectMonitor cannot inherit from any other class nor can
+// it use any virtual member functions. This restriction is critical to
+// the proper functioning of the VM.
+// - The _header and _owner fields should be separated by enough space
+// to avoid false sharing due to parallel access by different threads.
+// This is an advisory recommendation.
+// - The general layout of the fields in ObjectMonitor is:
+// _header
+// <lightly_used_fields>
+// <optional padding>
+// _owner
+// <remaining_fields>
+// - The VM assumes write ordering and machine word alignment with
+// respect to the _owner field and the <remaining_fields> that can
+// be read in parallel by other threads.
+// - Generally fields that are accessed closely together in time should
+// be placed proximally in space to promote data cache locality. That
+// is, temporal locality should condition spatial locality.
+// - We have to balance avoiding false sharing with excessive invalidation
+// from coherence traffic. As such, we try to cluster fields that tend
+// to be _written_ at approximately the same time onto the same data
+// cache line.
+// - We also have to balance the natural tension between minimizing
+// single threaded capacity misses with excessive multi-threaded
+// coherency misses. There is no single optimal layout for both
+// single-threaded and multi-threaded environments.
+//
+// - See ObjectMonitor::sanity_checks() for how critical restrictions are
+// enforced and advisory recommendations are reported.
+// - Adjacent ObjectMonitors should be separated by enough space to avoid
+// false sharing. This is handled by the ObjectMonitor allocation code
+// in synchronizer.cpp. Also see ObjectSynchronizer::sanity_checks().
+//
+// Futures notes:
+// - Separating _owner from the <remaining_fields> by enough space to
+// avoid false sharing might be profitable. Given
+// http://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+// we know that the CAS in monitorenter will invalidate the line
+// underlying _owner. We want to avoid an L1 data cache miss on that
+// same line for monitorexit. Putting these <remaining_fields>:
+// _recursions, _EntryList, _cxq, and _succ, all of which may be
+// fetched in the inflated unlock path, on a different cache line
+// would make them immune to CAS-based invalidation from the _owner
+// field.
+//
+// - The _recursions field should be of type int, or int32_t but not
+// intptr_t. There's no reason to use a 64-bit type for this field
+// in a 64-bit JVM.
class ObjectMonitor {
public:
@@ -84,7 +135,84 @@
OM_TIMED_OUT // Object.wait() timed out
};
+ private:
+ friend class ObjectSynchronizer;
+ friend class ObjectWaiter;
+ friend class VMStructs;
+
+ volatile markOop _header; // displaced object header word - mark
+ void* volatile _object; // backward object pointer - strong root
public:
+ ObjectMonitor * FreeNext; // Free list linkage
+ private:
+ DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE,
+ sizeof(volatile markOop) + sizeof(void * volatile) +
+ sizeof(ObjectMonitor *));
+ protected: // protected for JvmtiRawMonitor
+ void * volatile _owner; // pointer to owning thread OR BasicLock
+ volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor
+ volatile intptr_t _recursions; // recursion count, 0 for first entry
+ ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry.
+ // The list is actually composed of WaitNodes,
+ // acting as proxies for Threads.
+ private:
+ ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry.
+ Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling
+ Thread * volatile _Responsible;
+
+ volatile int _Spinner; // for exit->spinner handoff optimization
+ volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate
+ volatile int _SpinClock;
+ volatile intptr_t _SpinState; // MCS/CLH list of spinners
+ volatile int _SpinDuration;
+
+ volatile jint _count; // reference count to prevent reclamation/deflation
+ // at stop-the-world time. See deflate_idle_monitors().
+ // _count is approximately |_WaitSet| + |_EntryList|
+ protected:
+ ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
+ volatile jint _waiters; // number of waiting threads
+ private:
+ volatile int _WaitSetLock; // protects Wait Queue - simple spinlock
+
+ public:
+ static void Initialize();
+ static PerfCounter * _sync_ContendedLockAttempts;
+ static PerfCounter * _sync_FutileWakeups;
+ static PerfCounter * _sync_Parks;
+ static PerfCounter * _sync_EmptyNotifications;
+ static PerfCounter * _sync_Notifications;
+ static PerfCounter * _sync_SlowEnter;
+ static PerfCounter * _sync_SlowExit;
+ static PerfCounter * _sync_SlowNotify;
+ static PerfCounter * _sync_SlowNotifyAll;
+ static PerfCounter * _sync_FailedSpins;
+ static PerfCounter * _sync_SuccessfulSpins;
+ static PerfCounter * _sync_PrivateA;
+ static PerfCounter * _sync_PrivateB;
+ static PerfCounter * _sync_MonInCirculation;
+ static PerfCounter * _sync_MonScavenged;
+ static PerfCounter * _sync_Inflations;
+ static PerfCounter * _sync_Deflations;
+ static PerfLongVariable * _sync_MonExtant;
+
+ static int Knob_Verbose;
+ static int Knob_VerifyInUse;
+ static int Knob_SpinLimit;
+
+ void* operator new (size_t size) throw() {
+ return AllocateHeap(size, mtInternal);
+ }
+ void* operator new[] (size_t size) throw() {
+ return operator new (size);
+ }
+ void operator delete(void* p) {
+ FreeHeap(p, mtInternal);
+ }
+ void operator delete[] (void *p) {
+ operator delete(p);
+ }
+
// TODO-FIXME: the "offset" routines should return a type of off_t instead of int ...
// ByteSize would also be an appropriate type.
static int header_offset_in_bytes() { return offset_of(ObjectMonitor, _header); }
@@ -100,14 +228,11 @@
static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible); }
static int Spinner_offset_in_bytes() { return offset_of(ObjectMonitor, _Spinner); }
- public:
// Eventually we'll make provisions for multiple callbacks, but
// now one will suffice.
static int (*SpinCallbackFunction)(intptr_t, int);
static intptr_t SpinCallbackArgument;
-
- public:
markOop header() const;
void set_header(markOop hdr);
@@ -123,39 +248,22 @@
void* owner() const;
void set_owner(void* owner);
- intptr_t waiters() const;
+ jint waiters() const;
- intptr_t count() const;
- void set_count(intptr_t count);
- intptr_t contentions() const;
+ jint count() const;
+ void set_count(jint count);
+ jint contentions() const;
intptr_t recursions() const { return _recursions; }
- // JVM/DI GetMonitorInfo() needs this
+ // JVM/TI GetObjectMonitorUsage() needs this:
ObjectWaiter* first_waiter() { return _WaitSet; }
ObjectWaiter* next_waiter(ObjectWaiter* o) { return o->_next; }
Thread* thread_of_waiter(ObjectWaiter* o) { return o->_thread; }
- // initialize the monitor, exception the semaphore, all other fields
- // are simple integers or pointers
- ObjectMonitor() {
- _header = NULL;
- _count = 0;
- _waiters = 0;
- _recursions = 0;
- _object = NULL;
- _owner = NULL;
- _WaitSet = NULL;
- _WaitSetLock = 0;
- _Responsible = NULL;
- _succ = NULL;
- _cxq = NULL;
- FreeNext = NULL;
- _EntryList = NULL;
- _SpinFreq = 0;
- _SpinClock = 0;
- OwnerIsThread = 0;
- _previous_owner_tid = 0;
- }
+ protected:
+ // We don't typically expect or want the ctors or dtors to run.
+ // normal ObjectMonitors are type-stable and immortal.
+ ObjectMonitor() { ::memset((void *)this, 0, sizeof(*this)); }
~ObjectMonitor() {
// TODO: Add asserts ...
@@ -169,7 +277,7 @@
// _cxq == 0 _succ == NULL _owner == NULL _waiters == 0
// _count == 0 EntryList == NULL
// _recursions == 0 _WaitSet == NULL
- // TODO: assert (is_busy()|_recursions) == 0
+ assert(((is_busy()|_recursions) == 0), "freeing inuse monitor");
_succ = NULL;
_EntryList = NULL;
_cxq = NULL;
@@ -177,7 +285,6 @@
_recursions = 0;
_SpinFreq = 0;
_SpinClock = 0;
- OwnerIsThread = 0;
}
public:
@@ -221,7 +328,6 @@
int TrySpin_Fixed(Thread * Self);
int TrySpin_VaryFrequency(Thread * Self);
int TrySpin_VaryDuration(Thread * Self);
- void ctAsserts();
void ExitEpilog(Thread * Self, ObjectWaiter * Wakee);
bool ExitSuspendEquivalent(JavaThread * Self);
void post_monitor_wait_event(EventJavaMonitorWait * event,
@@ -229,102 +335,6 @@
jlong timeout,
bool timedout);
- private:
- friend class ObjectSynchronizer;
- friend class ObjectWaiter;
- friend class VMStructs;
-
- // WARNING: this must be the very first word of ObjectMonitor
- // This means this class can't use any virtual member functions.
-
- volatile markOop _header; // displaced object header word - mark
- void* volatile _object; // backward object pointer - strong root
-
- double SharingPad[1]; // temp to reduce false sharing
-
- // All the following fields must be machine word aligned
- // The VM assumes write ordering wrt these fields, which can be
- // read from other threads.
-
- protected: // protected for jvmtiRawMonitor
- void * volatile _owner; // pointer to owning thread OR BasicLock
- volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor
- volatile intptr_t _recursions; // recursion count, 0 for first entry
- private:
- int OwnerIsThread; // _owner is (Thread *) vs SP/BasicLock
- ObjectWaiter * volatile _cxq; // LL of recently-arrived threads blocked on entry.
- // The list is actually composed of WaitNodes, acting
- // as proxies for Threads.
- protected:
- ObjectWaiter * volatile _EntryList; // Threads blocked on entry or reentry.
- private:
- Thread * volatile _succ; // Heir presumptive thread - used for futile wakeup throttling
- Thread * volatile _Responsible;
- int _PromptDrain; // rqst to drain cxq into EntryList ASAP
-
- volatile int _Spinner; // for exit->spinner handoff optimization
- volatile int _SpinFreq; // Spin 1-out-of-N attempts: success rate
- volatile int _SpinClock;
- volatile int _SpinDuration;
- volatile intptr_t _SpinState; // MCS/CLH list of spinners
-
- // TODO-FIXME: _count, _waiters and _recursions should be of
- // type int, or int32_t but not intptr_t. There's no reason
- // to use 64-bit fields for these variables on a 64-bit JVM.
-
- volatile intptr_t _count; // reference count to prevent reclamation/deflation
- // at stop-the-world time. See deflate_idle_monitors().
- // _count is approximately |_WaitSet| + |_EntryList|
- protected:
- volatile intptr_t _waiters; // number of waiting threads
- private:
- protected:
- ObjectWaiter * volatile _WaitSet; // LL of threads wait()ing on the monitor
- private:
- volatile int _WaitSetLock; // protects Wait Queue - simple spinlock
-
- public:
- int _QMix; // Mixed prepend queue discipline
- ObjectMonitor * FreeNext; // Free list linkage
- intptr_t StatA, StatsB;
-
- public:
- static void Initialize();
- static PerfCounter * _sync_ContendedLockAttempts;
- static PerfCounter * _sync_FutileWakeups;
- static PerfCounter * _sync_Parks;
- static PerfCounter * _sync_EmptyNotifications;
- static PerfCounter * _sync_Notifications;
- static PerfCounter * _sync_SlowEnter;
- static PerfCounter * _sync_SlowExit;
- static PerfCounter * _sync_SlowNotify;
- static PerfCounter * _sync_SlowNotifyAll;
- static PerfCounter * _sync_FailedSpins;
- static PerfCounter * _sync_SuccessfulSpins;
- static PerfCounter * _sync_PrivateA;
- static PerfCounter * _sync_PrivateB;
- static PerfCounter * _sync_MonInCirculation;
- static PerfCounter * _sync_MonScavenged;
- static PerfCounter * _sync_Inflations;
- static PerfCounter * _sync_Deflations;
- static PerfLongVariable * _sync_MonExtant;
-
- public:
- static int Knob_Verbose;
- static int Knob_VerifyInUse;
- static int Knob_SpinLimit;
- void* operator new (size_t size) throw() {
- return AllocateHeap(size, mtInternal);
- }
- void* operator new[] (size_t size) throw() {
- return operator new (size);
- }
- void operator delete(void* p) {
- FreeHeap(p, mtInternal);
- }
- void operator delete[] (void *p) {
- operator delete(p);
- }
};
#undef TEVENT
--- a/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/objectMonitor.inline.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,15 +40,11 @@
_header = hdr;
}
-inline intptr_t ObjectMonitor::count() const {
+inline jint ObjectMonitor::count() const {
return _count;
}
-inline void ObjectMonitor::set_count(intptr_t count) {
- _count= count;
-}
-
-inline intptr_t ObjectMonitor::waiters() const {
+inline jint ObjectMonitor::waiters() const {
return _waiters;
}
@@ -61,7 +57,7 @@
assert(_count == 0, "Fatal logic error in ObjectMonitor count!");
assert(_waiters == 0, "Fatal logic error in ObjectMonitor waiters!");
assert(_recursions == 0, "Fatal logic error in ObjectMonitor recursions!");
- assert(_object, "Fatal logic error in ObjectMonitor object!");
+ assert(_object != NULL, "Fatal logic error in ObjectMonitor object!");
assert(_owner == 0, "Fatal logic error in ObjectMonitor owner!");
_header = NULL;
@@ -85,7 +81,6 @@
if (THREAD != _owner) {
if (THREAD->is_lock_owned((address) _owner)) {
_owner = THREAD; // regain ownership of inflated monitor
- OwnerIsThread = 1 ;
assert (_recursions == 0, "invariant") ;
} else {
check_slow(THREAD);
@@ -97,7 +92,7 @@
// return number of threads contending for this monitor
-inline intptr_t ObjectMonitor::contentions() const {
+inline jint ObjectMonitor::contentions() const {
return _count;
}
--- a/hotspot/src/share/vm/runtime/synchronizer.cpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/synchronizer.cpp Tue Oct 14 10:32:12 2014 -0700
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "classfile/vmSymbols.hpp"
+#include "memory/padded.hpp"
#include "memory/resourceArea.hpp"
#include "oops/markOop.hpp"
#include "oops/oop.inline.hpp"
@@ -110,6 +111,8 @@
#define NINFLATIONLOCKS 256
static volatile intptr_t InflationLocks[NINFLATIONLOCKS];
+// gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
+// want to expose the PaddedEnd template more than necessary.
ObjectMonitor * ObjectSynchronizer::gBlockList = NULL;
ObjectMonitor * volatile ObjectSynchronizer::gFreeList = NULL;
ObjectMonitor * volatile ObjectSynchronizer::gOmInUseList = NULL;
@@ -410,16 +413,15 @@
// performed by the CPU(s) or platform.
struct SharedGlobals {
+ char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
// These are highly shared mostly-read variables.
- // To avoid false-sharing they need to be the sole occupants of a $ line.
- double padPrefix[8];
+ // To avoid false-sharing they need to be the sole occupants of a cache line.
volatile int stwRandom;
volatile int stwCycle;
-
- // Hot RW variables -- Sequester to avoid false-sharing
- double padSuffix[16];
+ DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
+ // Hot RW variable -- Sequester to avoid false-sharing
volatile int hcSequence;
- double padFinal[8];
+ DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
};
static SharedGlobals GVars;
@@ -780,18 +782,18 @@
// Visitors ...
void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
- ObjectMonitor* block = gBlockList;
+ PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
ObjectMonitor* mid;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = _BLOCKSIZE - 1; i > 0; i--) {
- mid = block + i;
+ mid = (ObjectMonitor *)(block + i);
oop object = (oop) mid->object();
if (object != NULL) {
closure->do_monitor(mid);
}
}
- block = (ObjectMonitor*) block->FreeNext;
+ block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
}
@@ -806,10 +808,12 @@
void ObjectSynchronizer::oops_do(OopClosure* f) {
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
- for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
+ for (PaddedEnd<ObjectMonitor> * block =
+ (PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
+ block = (PaddedEnd<ObjectMonitor> *)next(block)) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = 1; i < _BLOCKSIZE; i++) {
- ObjectMonitor* mid = &block[i];
+ ObjectMonitor* mid = (ObjectMonitor *)&block[i];
if (mid->object() != NULL) {
f->do_oop((oop*)mid->object_addr());
}
@@ -966,16 +970,29 @@
// 3: allocate a block of new ObjectMonitors
// Both the local and global free lists are empty -- resort to malloc().
// In the current implementation objectMonitors are TSM - immortal.
+ // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
+ // each ObjectMonitor to start at the beginning of a cache line,
+ // so we use align_size_up().
+ // A better solution would be to use C++ placement-new.
+ // BEWARE: As it stands currently, we don't run the ctors!
assert(_BLOCKSIZE > 1, "invariant");
- ObjectMonitor * temp = new ObjectMonitor[_BLOCKSIZE];
+ size_t neededsize = sizeof(PaddedEnd<ObjectMonitor>) * _BLOCKSIZE;
+ PaddedEnd<ObjectMonitor> * temp;
+ size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
+ void* real_malloc_addr = (void *)NEW_C_HEAP_ARRAY(char, aligned_size,
+ mtInternal);
+ temp = (PaddedEnd<ObjectMonitor> *)
+ align_size_up((intptr_t)real_malloc_addr,
+ DEFAULT_CACHE_LINE_SIZE);
// NOTE: (almost) no way to recover if allocation failed.
// We might be able to induce a STW safepoint and scavenge enough
// objectMonitors to permit progress.
if (temp == NULL) {
- vm_exit_out_of_memory(sizeof (ObjectMonitor[_BLOCKSIZE]), OOM_MALLOC_ERROR,
+ vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR,
"Allocate ObjectMonitors");
}
+ (void)memset((void *) temp, 0, neededsize);
// Format the block.
// initialize the linked list, each monitor points to its next
@@ -986,7 +1003,7 @@
// look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
for (int i = 1; i < _BLOCKSIZE; i++) {
- temp[i].FreeNext = &temp[i+1];
+ temp[i].FreeNext = (ObjectMonitor *)&temp[i+1];
}
// terminate the last monitor as the end of list
@@ -1141,10 +1158,6 @@
}
-// Note that we could encounter some performance loss through false-sharing as
-// multiple locks occupy the same $ line. Padding might be appropriate.
-
-
ObjectMonitor * NOINLINE ObjectSynchronizer::inflate(Thread * Self,
oop object) {
// Inflate mutates the heap ...
@@ -1210,7 +1223,6 @@
// in which INFLATING appears in the mark.
m->Recycle();
m->_Responsible = NULL;
- m->OwnerIsThread = 0;
m->_recursions = 0;
m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // Consider: maintain by type/class
@@ -1257,8 +1269,8 @@
m->set_header(dmw);
// Optimization: if the mark->locker stack address is associated
- // with this thread we could simply set m->_owner = Self and
- // m->OwnerIsThread = 1. Note that a thread can inflate an object
+ // with this thread we could simply set m->_owner = Self.
+ // Note that a thread can inflate an object
// that it has stack-locked -- as might happen in wait() -- directly
// with CAS. That is, we can avoid the xchg-NULL .... ST idiom.
m->set_owner(mark->locker());
@@ -1302,7 +1314,6 @@
m->set_header(mark);
m->set_owner(NULL);
m->set_object(object);
- m->OwnerIsThread = 1;
m->_recursions = 0;
m->_Responsible = NULL;
m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class
@@ -1310,7 +1321,6 @@
if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) {
m->set_object(NULL);
m->set_owner(NULL);
- m->OwnerIsThread = 0;
m->Recycle();
omRelease(Self, m, true);
m = NULL;
@@ -1336,9 +1346,6 @@
}
}
-// Note that we could encounter some performance loss through false-sharing as
-// multiple locks occupy the same $ line. Padding might be appropriate.
-
// Deflate_idle_monitors() is called at all safepoints, immediately
// after all mutators are stopped, but before any objects have moved.
@@ -1491,12 +1498,14 @@
nInuse += gOmInUseCount;
}
- } else for (ObjectMonitor* block = gBlockList; block != NULL; block = next(block)) {
+ } else for (PaddedEnd<ObjectMonitor> * block =
+ (PaddedEnd<ObjectMonitor> *)gBlockList; block != NULL;
+ block = (PaddedEnd<ObjectMonitor> *)next(block)) {
// Iterate over all extant monitors - Scavenge all idle monitors.
assert(block->object() == CHAINMARKER, "must be a block header");
nInCirculation += _BLOCKSIZE;
for (int i = 1; i < _BLOCKSIZE; i++) {
- ObjectMonitor* mid = &block[i];
+ ObjectMonitor* mid = (ObjectMonitor*)&block[i];
oop obj = (oop) mid->object();
if (obj == NULL) {
@@ -1648,18 +1657,18 @@
// Verify all monitors in the monitor cache, the verification is weak.
void ObjectSynchronizer::verify() {
- ObjectMonitor* block = gBlockList;
+ PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
ObjectMonitor* mid;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
for (int i = 1; i < _BLOCKSIZE; i++) {
- mid = block + i;
+ mid = (ObjectMonitor *)(block + i);
oop object = (oop) mid->object();
if (object != NULL) {
mid->verify();
}
}
- block = (ObjectMonitor*) block->FreeNext;
+ block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
}
@@ -1668,18 +1677,19 @@
// the list of extant blocks without taking a lock.
int ObjectSynchronizer::verify_objmon_isinpool(ObjectMonitor *monitor) {
- ObjectMonitor* block = gBlockList;
+ PaddedEnd<ObjectMonitor> * block = (PaddedEnd<ObjectMonitor> *)gBlockList;
while (block) {
assert(block->object() == CHAINMARKER, "must be a block header");
- if (monitor > &block[0] && monitor < &block[_BLOCKSIZE]) {
+ if (monitor > (ObjectMonitor *)&block[0] &&
+ monitor < (ObjectMonitor *)&block[_BLOCKSIZE]) {
address mon = (address) monitor;
address blk = (address) block;
size_t diff = mon - blk;
- assert((diff % sizeof(ObjectMonitor)) == 0, "check");
+ assert((diff % sizeof(PaddedEnd<ObjectMonitor>)) == 0, "check");
return 1;
}
- block = (ObjectMonitor*) block->FreeNext;
+ block = (PaddedEnd<ObjectMonitor> *) block->FreeNext;
}
return 0;
}
--- a/hotspot/src/share/vm/runtime/synchronizer.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/synchronizer.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -134,6 +134,8 @@
private:
enum { _BLOCKSIZE = 128 };
+ // gBlockList is really PaddedEnd<ObjectMonitor> *, but we don't
+ // want to expose the PaddedEnd template more than necessary.
static ObjectMonitor* gBlockList;
static ObjectMonitor * volatile gFreeList;
// global monitor in use list, for moribund threads,
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp Tue Oct 14 10:32:12 2014 -0700
@@ -1070,8 +1070,8 @@
volatile_nonstatic_field(ObjectMonitor, _header, markOop) \
unchecked_nonstatic_field(ObjectMonitor, _object, sizeof(void *)) /* NOTE: no type */ \
unchecked_nonstatic_field(ObjectMonitor, _owner, sizeof(void *)) /* NOTE: no type */ \
- volatile_nonstatic_field(ObjectMonitor, _count, intptr_t) \
- volatile_nonstatic_field(ObjectMonitor, _waiters, intptr_t) \
+ volatile_nonstatic_field(ObjectMonitor, _count, jint) \
+ volatile_nonstatic_field(ObjectMonitor, _waiters, jint) \
volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \
nonstatic_field(ObjectMonitor, FreeNext, ObjectMonitor*) \
volatile_nonstatic_field(BasicLock, _displaced_header, markOop) \
@@ -2507,6 +2507,12 @@
declare_constant(Deoptimization::Action_make_not_compilable) \
declare_constant(Deoptimization::Action_LIMIT) \
\
+ /***************************************************/ \
+ /* DEFAULT_CACHE_LINE_SIZE (globalDefinitions.hpp) */ \
+ /***************************************************/ \
+ \
+ declare_constant(DEFAULT_CACHE_LINE_SIZE) \
+ \
/*********************/ \
/* Matcher (C2 only) */ \
/*********************/ \
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp Mon Oct 13 22:11:39 2014 +0200
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp Tue Oct 14 10:32:12 2014 -0700
@@ -540,7 +540,9 @@
// The expected size in bytes of a cache line, used to pad data structures.
-#define DEFAULT_CACHE_LINE_SIZE 64
+#ifndef DEFAULT_CACHE_LINE_SIZE
+ #define DEFAULT_CACHE_LINE_SIZE 64
+#endif
//----------------------------------------------------------------------------------------------------