8040140: System.nanoTime() is slow and non-monotonic on OS X
Reviewed-by: sspitsyn, shade, dholmes, acorn
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp Thu Apr 24 14:06:40 2014 -0700
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp Fri Apr 25 07:40:33 2014 +0200
@@ -127,8 +127,12 @@
// global variables
julong os::Bsd::_physical_memory = 0;
-
+#ifdef __APPLE__
+mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0};
+volatile uint64_t os::Bsd::_max_abstime = 0;
+#else
int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL;
+#endif
pthread_t os::Bsd::_main_thread;
int os::Bsd::_page_size = -1;
@@ -986,13 +990,15 @@
return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000);
}
+#ifndef __APPLE__
#ifndef CLOCK_MONOTONIC
#define CLOCK_MONOTONIC (1)
#endif
+#endif
#ifdef __APPLE__
void os::Bsd::clock_init() {
- // XXXDARWIN: Investigate replacement monotonic clock
+ mach_timebase_info(&_timebase_info);
}
#else
void os::Bsd::clock_init() {
@@ -1007,10 +1013,39 @@
#endif
+
+#ifdef __APPLE__
+
+jlong os::javaTimeNanos() {
+ const uint64_t tm = mach_absolute_time();
+ const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom;
+ const uint64_t prev = Bsd::_max_abstime;
+ if (now <= prev) {
+ return prev; // same or retrograde time;
+ }
+ const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev);
+ assert(obsv >= prev, "invariant"); // Monotonicity
+ // If the CAS succeeded then we're done and return "now".
+ // If the CAS failed and the observed value "obsv" is >= now then
+ // we should return "obsv". If the CAS failed and now > obsv > prv then
+ // some other thread raced this thread and installed a new value, in which case
+ // we could either (a) retry the entire operation, (b) retry trying to install now
+ // or (c) just return obsv. We use (c). No loop is required although in some cases
+ // we might discard a higher "now" value in deference to a slightly lower but freshly
+ // installed obsv value. That's entirely benign -- it admits no new orderings compared
+ // to (a) or (b) -- and greatly reduces coherence traffic.
+ // We might also condition (c) on the magnitude of the delta between obsv and now.
+ // Avoiding excessive CAS operations to hot RW locations is critical.
+ // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+ return (prev == obsv) ? now : obsv;
+}
+
+#else // __APPLE__
+
jlong os::javaTimeNanos() {
if (os::supports_monotonic_clock()) {
struct timespec tp;
- int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp);
+ int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp);
assert(status == 0, "gettime error");
jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec);
return result;
@@ -1023,6 +1058,8 @@
}
}
+#endif // __APPLE__
+
void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) {
if (os::supports_monotonic_clock()) {
info_ptr->max_value = ALL_64_BITS;
--- a/hotspot/src/os/bsd/vm/os_bsd.hpp Thu Apr 24 14:06:40 2014 -0700
+++ b/hotspot/src/os/bsd/vm/os_bsd.hpp Fri Apr 25 07:40:33 2014 +0200
@@ -58,7 +58,13 @@
// For signal flags diagnostics
static int sigflags[MAXSIGNUM];
+#ifdef __APPLE__
+ // mach_absolute_time
+ static mach_timebase_info_data_t _timebase_info;
+ static volatile uint64_t _max_abstime;
+#else
static int (*_clock_gettime)(clockid_t, struct timespec *);
+#endif
static GrowableArray<int>* _cpu_to_node;
@@ -134,10 +140,6 @@
// Real-time clock functions
static void clock_init(void);
- static int clock_gettime(clockid_t clock_id, struct timespec *tp) {
- return _clock_gettime ? _clock_gettime(clock_id, tp) : -1;
- }
-
// Stack repair handling
// none present
--- a/hotspot/src/os/bsd/vm/os_bsd.inline.hpp Thu Apr 24 14:06:40 2014 -0700
+++ b/hotspot/src/os/bsd/vm/os_bsd.inline.hpp Fri Apr 25 07:40:33 2014 +0200
@@ -287,7 +287,11 @@
}
inline bool os::supports_monotonic_clock() {
+#ifdef __APPLE__
+ return true;
+#else
return Bsd::_clock_gettime != NULL;
+#endif
}
#endif // OS_BSD_VM_OS_BSD_INLINE_HPP
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp Thu Apr 24 14:06:40 2014 -0700
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp Fri Apr 25 07:40:33 2014 +0200
@@ -347,11 +347,7 @@
static hrtime_t first_hrtime = 0;
static const hrtime_t hrtime_hz = 1000*1000*1000;
-const int LOCK_BUSY = 1;
-const int LOCK_FREE = 0;
-const int LOCK_INVALID = -1;
static volatile hrtime_t max_hrtime = 0;
-static volatile int max_hrtime_lock = LOCK_FREE; // Update counter with LSB as lock-in-progress
void os::Solaris::initialize_system_info() {
@@ -1364,58 +1360,31 @@
}
-// gethrtime can move backwards if read from one cpu and then a different cpu
-// getTimeNanos is guaranteed to not move backward on Solaris
-// local spinloop created as faster for a CAS on an int than
-// a CAS on a 64bit jlong. Also Atomic::cmpxchg for jlong is not
-// supported on sparc v8 or pre supports_cx8 intel boxes.
-// oldgetTimeNanos for systems which do not support CAS on 64bit jlong
-// i.e. sparc v8 and pre supports_cx8 (i486) intel boxes
-inline hrtime_t oldgetTimeNanos() {
- int gotlock = LOCK_INVALID;
- hrtime_t newtime = gethrtime();
-
- for (;;) {
-// grab lock for max_hrtime
- int curlock = max_hrtime_lock;
- if (curlock & LOCK_BUSY) continue;
- if (gotlock = Atomic::cmpxchg(LOCK_BUSY, &max_hrtime_lock, LOCK_FREE) != LOCK_FREE) continue;
- if (newtime > max_hrtime) {
- max_hrtime = newtime;
- } else {
- newtime = max_hrtime;
- }
- // release lock
- max_hrtime_lock = LOCK_FREE;
- return newtime;
- }
-}
-// gethrtime can move backwards if read from one cpu and then a different cpu
-// getTimeNanos is guaranteed to not move backward on Solaris
+// gethrtime() should be monotonic according to the documentation,
+// but some virtualized platforms are known to break this guarantee.
+// getTimeNanos() must be guaranteed not to move backwards, so we
+// are forced to add a check here.
inline hrtime_t getTimeNanos() {
- if (VM_Version::supports_cx8()) {
- const hrtime_t now = gethrtime();
- // Use atomic long load since 32-bit x86 uses 2 registers to keep long.
- const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime);
- if (now <= prev) return prev; // same or retrograde time;
- const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
- assert(obsv >= prev, "invariant"); // Monotonicity
- // If the CAS succeeded then we're done and return "now".
- // If the CAS failed and the observed value "obs" is >= now then
- // we should return "obs". If the CAS failed and now > obs > prv then
- // some other thread raced this thread and installed a new value, in which case
- // we could either (a) retry the entire operation, (b) retry trying to install now
- // or (c) just return obs. We use (c). No loop is required although in some cases
- // we might discard a higher "now" value in deference to a slightly lower but freshly
- // installed obs value. That's entirely benign -- it admits no new orderings compared
- // to (a) or (b) -- and greatly reduces coherence traffic.
- // We might also condition (c) on the magnitude of the delta between obs and now.
- // Avoiding excessive CAS operations to hot RW locations is critical.
- // See http://blogs.sun.com/dave/entry/cas_and_cache_trivia_invalidate
- return (prev == obsv) ? now : obsv ;
- } else {
- return oldgetTimeNanos();
- }
+ const hrtime_t now = gethrtime();
+ const hrtime_t prev = max_hrtime;
+ if (now <= prev) {
+ return prev; // same or retrograde time;
+ }
+ const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
+ assert(obsv >= prev, "invariant"); // Monotonicity
+ // If the CAS succeeded then we're done and return "now".
+ // If the CAS failed and the observed value "obsv" is >= now then
+ // we should return "obsv". If the CAS failed and now > obsv > prv then
+ // some other thread raced this thread and installed a new value, in which case
+ // we could either (a) retry the entire operation, (b) retry trying to install now
+ // or (c) just return obsv. We use (c). No loop is required although in some cases
+ // we might discard a higher "now" value in deference to a slightly lower but freshly
+ // installed obsv value. That's entirely benign -- it admits no new orderings compared
+ // to (a) or (b) -- and greatly reduces coherence traffic.
+ // We might also condition (c) on the magnitude of the delta between obsv and now.
+ // Avoiding excessive CAS operations to hot RW locations is critical.
+ // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
+ return (prev == obsv) ? now : obsv;
}
// Time since start-up in seconds to a fine granularity.
--- a/hotspot/src/share/vm/runtime/os.hpp Thu Apr 24 14:06:40 2014 -0700
+++ b/hotspot/src/share/vm/runtime/os.hpp Fri Apr 25 07:40:33 2014 +0200
@@ -48,6 +48,9 @@
#ifdef TARGET_OS_FAMILY_bsd
# include "jvm_bsd.h"
# include <setjmp.h>
+# ifdef __APPLE__
+# include <mach/mach_time.h>
+# endif
#endif
class AgentLibrary;