6822204: volatile fences should prefer lock:addl to actual mfence instructions
Reviewed-by: kvn, phh
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -817,21 +817,6 @@
Label _atomic_add_stub; // called from other stubs
- // Support for void OrderAccess::fence().
- //
- address generate_fence() {
- StubCodeMark mark(this, "StubRoutines", "fence");
- address start = __ pc();
-
- __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore |
- Assembler::StoreLoad | Assembler::StoreStore));
- __ retl(false);
- __ delayed()->nop();
-
- return start;
- }
-
-
//------------------------------------------------------------------------------------------------------------------------
// The following routine generates a subroutine to throw an asynchronous
// UnknownError when an unsafe access gets a fault that could not be
@@ -2861,7 +2846,6 @@
StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry;
StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
- StubRoutines::_fence_entry = generate_fence();
#endif // COMPILER2 !=> _LP64
}
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -1438,26 +1438,12 @@
}
}
-// Serializes memory.
+// Emit mfence instruction
void Assembler::mfence() {
- // Memory barriers are only needed on multiprocessors
- if (os::is_MP()) {
- if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
- emit_byte( 0x0F ); // MFENCE; faster blows no regs
- emit_byte( 0xAE );
- emit_byte( 0xF0 );
- } else {
- // All usable chips support "locked" instructions which suffice
- // as barriers, and are much faster than the alternative of
- // using cpuid instruction. We use here a locked add [esp],0.
- // This is conveniently otherwise a no-op except for blowing
- // flags (which we save and restore.)
- pushf(); // Save eflags register
- lock();
- addl(Address(rsp, 0), 0);// Assert the lock# signal here
- popf(); // Restore eflags register
- }
- }
+ NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
+ emit_byte( 0x0F );
+ emit_byte( 0xAE );
+ emit_byte( 0xF0 );
}
void Assembler::mov(Register dst, Register src) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -1068,15 +1068,23 @@
LoadLoad = 1 << 0
};
- // Serializes memory.
+ // Serializes memory and blows flags
void membar(Membar_mask_bits order_constraint) {
- // We only have to handle StoreLoad and LoadLoad
- if (order_constraint & StoreLoad) {
- // MFENCE subsumes LFENCE
- mfence();
- } /* [jk] not needed currently: else if (order_constraint & LoadLoad) {
- lfence();
- } */
+ if (os::is_MP()) {
+ // We only have to handle StoreLoad
+ if (order_constraint & StoreLoad) {
+ // All usable chips support "locked" instructions which suffice
+ // as barriers, and are much faster than the alternative of
+ // using cpuid instruction. We use here a locked add [esp],0.
+ // This is conveniently otherwise a no-op except for blowing
+ // flags.
+ // Any change to this code may need to revisit other places in
+ // the code where this idiom is used, in particular the
+ // orderAccess code.
+ lock();
+ addl(Address(rsp, 0), 0);// Assert the lock# signal here
+ }
+ }
}
void mfence();
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -637,7 +637,7 @@
address generate_orderaccess_fence() {
StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
address start = __ pc();
- __ mfence();
+ __ membar(Assembler::StoreLoad);
__ ret(0);
return start;
--- a/hotspot/src/cpu/x86/vm/x86_32.ad Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad Thu Mar 26 14:31:45 2009 -0700
@@ -4288,24 +4288,6 @@
emit_opcode(cbuf, 0xC8 + $src2$$reg);
%}
- enc_class enc_membar_acquire %{
- // Doug Lea believes this is not needed with current Sparcs and TSO.
- // MacroAssembler masm(&cbuf);
- // masm.membar();
- %}
-
- enc_class enc_membar_release %{
- // Doug Lea believes this is not needed with current Sparcs and TSO.
- // MacroAssembler masm(&cbuf);
- // masm.membar();
- %}
-
- enc_class enc_membar_volatile %{
- MacroAssembler masm(&cbuf);
- masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
- Assembler::StoreStore));
- %}
-
// Atomically load the volatile long
enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
emit_opcode(cbuf,0xDF);
@@ -7498,9 +7480,9 @@
ins_cost(400);
size(0);
- format %{ "MEMBAR-acquire" %}
- ins_encode( enc_membar_acquire );
- ins_pipe(pipe_slow);
+ format %{ "MEMBAR-acquire ! (empty encoding)" %}
+ ins_encode();
+ ins_pipe(empty);
%}
instruct membar_acquire_lock() %{
@@ -7519,9 +7501,9 @@
ins_cost(400);
size(0);
- format %{ "MEMBAR-release" %}
- ins_encode( enc_membar_release );
- ins_pipe(pipe_slow);
+ format %{ "MEMBAR-release ! (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
%}
instruct membar_release_lock() %{
@@ -7535,12 +7517,22 @@
ins_pipe(empty);
%}
-instruct membar_volatile() %{
+instruct membar_volatile(eFlagsReg cr) %{
match(MemBarVolatile);
+ effect(KILL cr);
ins_cost(400);
- format %{ "MEMBAR-volatile" %}
- ins_encode( enc_membar_volatile );
+ format %{
+ $$template
+ if (os::is_MP()) {
+ $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
+ } else {
+ $$emit$$"MEMBAR-volatile ! (empty encoding)"
+ }
+ %}
+ ins_encode %{
+ __ membar(Assembler::StoreLoad);
+ %}
ins_pipe(pipe_slow);
%}
--- a/hotspot/src/cpu/x86/vm/x86_64.ad Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad Thu Mar 26 14:31:45 2009 -0700
@@ -4162,33 +4162,6 @@
// done:
%}
- enc_class enc_membar_acquire
- %{
- // [jk] not needed currently, if you enable this and it really
- // emits code don't forget to the remove the "size(0)" line in
- // membar_acquire()
- // MacroAssembler masm(&cbuf);
- // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
- // Assembler::LoadLoad));
- %}
-
- enc_class enc_membar_release
- %{
- // [jk] not needed currently, if you enable this and it really
- // emits code don't forget to the remove the "size(0)" line in
- // membar_release()
- // MacroAssembler masm(&cbuf);
- // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
- // Assembler::StoreStore));
- %}
-
- enc_class enc_membar_volatile
- %{
- MacroAssembler masm(&cbuf);
- masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
- Assembler::StoreStore));
- %}
-
// Safepoint Poll. This polls the safepoint page, and causes an
// exception if it is not readable. Unfortunately, it kills
// RFLAGS in the process.
@@ -7458,7 +7431,7 @@
ins_cost(0);
size(0);
- format %{ "MEMBAR-acquire" %}
+ format %{ "MEMBAR-acquire ! (empty encoding)" %}
ins_encode();
ins_pipe(empty);
%}
@@ -7481,7 +7454,7 @@
ins_cost(0);
size(0);
- format %{ "MEMBAR-release" %}
+ format %{ "MEMBAR-release ! (empty encoding)" %}
ins_encode();
ins_pipe(empty);
%}
@@ -7498,13 +7471,22 @@
ins_pipe(empty);
%}
-instruct membar_volatile()
-%{
+instruct membar_volatile(rFlagsReg cr) %{
match(MemBarVolatile);
+ effect(KILL cr);
ins_cost(400);
- format %{ "MEMBAR-volatile" %}
- ins_encode(enc_membar_volatile);
+ format %{
+ $$template
+ if (os::is_MP()) {
+ $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
+ } else {
+ $$emit$$"MEMBAR-volatile ! (empty encoding)"
+ }
+ %}
+ ins_encode %{
+ __ membar(Assembler::StoreLoad);
+ %}
ins_pipe(pipe_slow);
%}
--- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -29,13 +29,11 @@
static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint);
static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
static jint (*atomic_add_func) (jint, volatile jint*);
- static void (*fence_func) ();
static jint atomic_xchg_bootstrap (jint, volatile jint*);
static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint);
static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
static jint atomic_add_bootstrap (jint, volatile jint*);
- static void fence_bootstrap ();
static void setup_fpu() {}
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -44,11 +44,12 @@
inline void OrderAccess::fence() {
if (os::is_MP()) {
+ // always use locked addl since mfence is sometimes expensive
#ifdef AMD64
- __asm__ __volatile__ ("mfence":::"memory");
+ __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory");
#else
__asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
-#endif // AMD64
+#endif
}
}
--- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -60,22 +60,10 @@
dummy = 0;
}
-#if defined(COMPILER2) || defined(_LP64)
-
inline void OrderAccess::fence() {
_OrderAccess_fence();
}
-#else // defined(COMPILER2) || defined(_LP64)
-
-inline void OrderAccess::fence() {
- if (os::is_MP()) {
- (*os::fence_func)();
- }
-}
-
-#endif // defined(COMPILER2) || defined(_LP64)
-
#endif // _GNU_SOURCE
inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; }
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -619,7 +619,6 @@
typedef jint cmpxchg_func_t (jint, volatile jint*, jint);
typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong);
typedef jint add_func_t (jint, volatile jint*);
-typedef void fence_func_t ();
jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) {
// try to use the stub:
@@ -681,25 +680,10 @@
return (*dest) += add_value;
}
-void os::fence_bootstrap() {
- // try to use the stub:
- fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
- if (func != NULL) {
- os::fence_func = func;
- (*func)();
- return;
- }
- assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
- // don't have to do anything for a single thread
-}
-
xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap;
cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap;
cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
add_func_t* os::atomic_add_func = os::atomic_add_bootstrap;
-fence_func_t* os::fence_func = os::fence_bootstrap;
#endif // !_LP64 && !COMPILER2
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -29,13 +29,11 @@
static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint);
static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
static jint (*atomic_add_func) (jint, volatile jint*);
- static void (*fence_func) ();
static jint atomic_xchg_bootstrap (jint, volatile jint*);
static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint);
static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
static jint atomic_add_bootstrap (jint, volatile jint*);
- static void fence_bootstrap ();
static void setup_fpu() {}
--- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -61,11 +61,8 @@
#endif // AMD64
}
inline void _OrderAccess_fence() {
-#ifdef AMD64
- __asm__ __volatile__ ("mfence":::"memory");
-#else
+ // Always use locked addl since mfence is sometimes expensive
__asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
-#endif // AMD64
}
}
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -794,7 +794,6 @@
typedef jint cmpxchg_func_t (jint, volatile jint*, jint);
typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong);
typedef jint add_func_t (jint, volatile jint*);
-typedef void fence_func_t ();
jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) {
// try to use the stub:
@@ -856,25 +855,10 @@
return (*dest) += add_value;
}
-void os::fence_bootstrap() {
- // try to use the stub:
- fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
- if (func != NULL) {
- os::fence_func = func;
- (*func)();
- return;
- }
- assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
- // don't have to do anything for a single thread
-}
-
xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap;
cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap;
cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
add_func_t* os::atomic_add_func = os::atomic_add_bootstrap;
-fence_func_t* os::fence_func = os::fence_bootstrap;
extern "C" _solaris_raw_setup_fpu(address ptr);
void os::setup_fpu() {
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -32,13 +32,11 @@
static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint);
static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
static jint (*atomic_add_func) (jint, volatile jint*);
- static void (*fence_func) ();
static jint atomic_xchg_bootstrap (jint, volatile jint*);
static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint);
static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
static jint atomic_add_bootstrap (jint, volatile jint*);
- static void fence_bootstrap ();
static void setup_fpu();
#endif // AMD64
--- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -46,7 +46,7 @@
inline void OrderAccess::fence() {
#ifdef AMD64
- (*os::fence_func)();
+ StubRoutines_fence();
#else
if (os::is_MP()) {
__asm {
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -196,7 +196,6 @@
typedef jlong cmpxchg_long_func_t (jlong, volatile jlong*, jlong);
typedef jint add_func_t (jint, volatile jint*);
typedef intptr_t add_ptr_func_t (intptr_t, volatile intptr_t*);
-typedef void fence_func_t ();
#ifdef AMD64
@@ -292,27 +291,11 @@
return (*dest) += add_value;
}
-void os::fence_bootstrap() {
- // try to use the stub:
- fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
- if (func != NULL) {
- os::fence_func = func;
- (*func)();
- return;
- }
- assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
- // don't have to do anything for a single thread
-}
-
-
xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap;
xchg_ptr_func_t* os::atomic_xchg_ptr_func = os::atomic_xchg_ptr_bootstrap;
cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap;
add_func_t* os::atomic_add_func = os::atomic_add_bootstrap;
add_ptr_func_t* os::atomic_add_ptr_func = os::atomic_add_ptr_bootstrap;
-fence_func_t* os::fence_func = os::fence_bootstrap;
#endif // AMD64
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -35,9 +35,6 @@
static jint (*atomic_add_func) (jint, volatile jint*);
static intptr_t (*atomic_add_ptr_func) (intptr_t, volatile intptr_t*);
- static void (*fence_func) ();
-
-
static jint atomic_xchg_bootstrap (jint, volatile jint*);
static intptr_t atomic_xchg_ptr_bootstrap (intptr_t, volatile intptr_t*);
@@ -53,8 +50,6 @@
#ifdef AMD64
static jint atomic_add_bootstrap (jint, volatile jint*);
static intptr_t atomic_add_ptr_bootstrap (intptr_t, volatile intptr_t*);
-
- static void fence_bootstrap ();
#endif // AMD64
static void setup_fpu();
--- a/hotspot/src/share/vm/includeDB_core Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/includeDB_core Thu Mar 26 14:31:45 2009 -0700
@@ -3154,6 +3154,8 @@
oopsHierarchy.cpp thread_<os_family>.inline.hpp
orderAccess.cpp orderAccess.hpp
+orderAccess.cpp stubRoutines.hpp
+orderAccess.cpp thread.hpp
orderAccess.hpp allocation.hpp
orderAccess.hpp os.hpp
--- a/hotspot/src/share/vm/runtime/orderAccess.cpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.cpp Thu Mar 26 14:31:45 2009 -0700
@@ -26,3 +26,15 @@
# include "incls/_orderAccess.cpp.incl"
volatile intptr_t OrderAccess::dummy = 0;
+
+void OrderAccess::StubRoutines_fence() {
+ // Use a stub if it exists. It may not exist during bootstrap so do
+ // nothing in that case but assert if no fence code exists after threads have been created
+ void (*func)() = CAST_TO_FN_PTR(void (*)(), StubRoutines::fence_entry());
+
+ if (func != NULL) {
+ (*func)();
+ return;
+ }
+ assert(Threads::number_of_threads() == 0, "for bootstrap only");
+}
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp Thu Mar 26 14:31:45 2009 -0700
@@ -300,4 +300,10 @@
// In order to force a memory access, implementations may
// need a volatile externally visible dummy variable.
static volatile intptr_t dummy;
+
+ private:
+ // This is a helper that invokes the StubRoutines::fence_entry()
+ // routine if it exists, It should only be used by platforms that
+ // don't another way to do the inline eassembly.
+ static void StubRoutines_fence();
};