6973570: OrderAccess::storestore() scales poorly on multi-socket x64 and sparc: cache-line ping-ponging
Summary: volatile store to static variable removed in favour of a volatile store to stack to avoid excessive cache coherency traffic; verified that the volatile store is not elided by any of our current compilers.
Reviewed-by: dholmes, dice, jcoomes, kvn
--- a/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -36,8 +36,8 @@
}
inline void OrderAccess::release() {
- jint* dummy = (jint*)&dummy;
- __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+ jint* local_dummy = (jint*)&local_dummy;
+ __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
}
inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,16 +30,18 @@
inline void OrderAccess::storeload() { fence(); }
inline void OrderAccess::acquire() {
- volatile intptr_t dummy;
+ volatile intptr_t local_dummy;
#ifdef AMD64
- __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+ __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
#else
- __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+ __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
#endif // AMD64
}
inline void OrderAccess::release() {
- dummy = 0;
+ // Avoid hitting the same cache-line from
+ // different threads.
+ volatile jint local_dummy = 0;
}
inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -42,8 +42,8 @@
}
inline void OrderAccess::release() {
- jint* dummy = (jint*)&dummy;
- __asm__ volatile("stw %%g0, [%0]" : : "r" (dummy) : "memory");
+ jint* local_dummy = (jint*)&local_dummy;
+ __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory");
}
inline void OrderAccess::fence() {
@@ -57,7 +57,9 @@
}
inline void OrderAccess::release() {
- dummy = 0;
+ // Avoid hitting the same cache-line from
+ // different threads.
+ volatile jint local_dummy = 0;
}
inline void OrderAccess::fence() {
--- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -40,7 +40,9 @@
}
inline void OrderAccess::release() {
- dummy = 0;
+ // Avoid hitting the same cache-line from
+ // different threads.
+ volatile jint local_dummy = 0;
}
inline void OrderAccess::fence() {
@@ -53,11 +55,11 @@
extern "C" {
inline void _OrderAccess_acquire() {
- volatile intptr_t dummy;
+ volatile intptr_t local_dummy;
#ifdef AMD64
- __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (dummy) : : "memory");
+ __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory");
#else
- __asm__ volatile ("movl 0(%%esp),%0" : "=r" (dummy) : : "memory");
+ __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory");
#endif // AMD64
}
inline void _OrderAccess_fence() {
--- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,7 @@
inline void OrderAccess::release() {
// A volatile store has release semantics.
- dummy = 0;
+ volatile jint local_dummy = 0;
}
inline void OrderAccess::fence() {
--- a/hotspot/src/share/vm/runtime/orderAccess.cpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.cpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,8 +25,6 @@
# include "incls/_precompiled.incl"
# include "incls/_orderAccess.cpp.incl"
-volatile intptr_t OrderAccess::dummy = 0;
-
void OrderAccess::StubRoutines_fence() {
// Use a stub if it exists. It may not exist during bootstrap so do
// nothing in that case but assert if no fence code exists after threads have been created
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp Mon Aug 09 18:03:50 2010 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp Tue Aug 10 14:53:35 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -166,6 +166,12 @@
// and release must include a sequence point, usually via a volatile memory
// access. Other ways to guarantee a sequence point are, e.g., use of
// indirect calls and linux's __asm__ volatile.
+// Note: as of 6973570, we have replaced the originally static "dummy" field
+// (see above) by a volatile store to the stack. All of the versions of the
+// compilers that we currently use (SunStudio, gcc and VC++) respect the
+// semantics of volatile here. If you build HotSpot using other
+// compilers, you may need to verify that no compiler reordering occurs
+// across the sequence point respresented by the volatile access.
//
//
// os::is_MP Considered Redundant
@@ -297,10 +303,6 @@
static void release_store_ptr_fence(volatile intptr_t* p, intptr_t v);
static void release_store_ptr_fence(volatile void* p, void* v);
- // In order to force a memory access, implementations may
- // need a volatile externally visible dummy variable.
- static volatile intptr_t dummy;
-
private:
// This is a helper that invokes the StubRoutines::fence_entry()
// routine if it exists, It should only be used by platforms that