6822204: volatile fences should prefer lock:addl to actual mfence instructions
authornever
Thu, 26 Mar 2009 14:31:45 -0700
changeset 2338 a8660a1b709b
parent 2335 e1965d139bb7
child 2339 2c9728eddc81
6822204: volatile fences should prefer lock:addl to actual mfence instructions Reviewed-by: kvn, phh
hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
hotspot/src/cpu/x86/vm/assembler_x86.cpp
hotspot/src/cpu/x86/vm/assembler_x86.hpp
hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
hotspot/src/cpu/x86/vm/x86_32.ad
hotspot/src/cpu/x86/vm/x86_64.ad
hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp
hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp
hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp
hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp
hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp
hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp
hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp
hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp
hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp
hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp
hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp
hotspot/src/share/vm/includeDB_core
hotspot/src/share/vm/runtime/orderAccess.cpp
hotspot/src/share/vm/runtime/orderAccess.hpp
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -817,21 +817,6 @@
   Label _atomic_add_stub;  // called from other stubs
 
 
-  // Support for void OrderAccess::fence().
-  //
-  address generate_fence() {
-    StubCodeMark mark(this, "StubRoutines", "fence");
-    address start = __ pc();
-
-    __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad  | Assembler::LoadStore |
-                                          Assembler::StoreLoad | Assembler::StoreStore));
-    __ retl(false);
-    __ delayed()->nop();
-
-    return start;
-  }
-
-
   //------------------------------------------------------------------------------------------------------------------------
   // The following routine generates a subroutine to throw an asynchronous
   // UnknownError when an unsafe access gets a fault that could not be
@@ -2861,7 +2846,6 @@
     StubRoutines::_atomic_cmpxchg_ptr_entry  = StubRoutines::_atomic_cmpxchg_entry;
     StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
-    StubRoutines::_fence_entry               = generate_fence();
 #endif  // COMPILER2 !=> _LP64
   }
 
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -1438,26 +1438,12 @@
   }
 }
 
-// Serializes memory.
+// Emit mfence instruction
 void Assembler::mfence() {
-    // Memory barriers are only needed on multiprocessors
-  if (os::is_MP()) {
-    if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) {
-      emit_byte( 0x0F );                // MFENCE; faster blows no regs
-      emit_byte( 0xAE );
-      emit_byte( 0xF0 );
-    } else {
-      // All usable chips support "locked" instructions which suffice
-      // as barriers, and are much faster than the alternative of
-      // using cpuid instruction. We use here a locked add [esp],0.
-      // This is conveniently otherwise a no-op except for blowing
-      // flags (which we save and restore.)
-      pushf();                // Save eflags register
-      lock();
-      addl(Address(rsp, 0), 0);// Assert the lock# signal here
-      popf();                 // Restore eflags register
-    }
-  }
+  NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
+  emit_byte( 0x0F );
+  emit_byte( 0xAE );
+  emit_byte( 0xF0 );
 }
 
 void Assembler::mov(Register dst, Register src) {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -1068,15 +1068,23 @@
     LoadLoad   = 1 << 0
   };
 
-  // Serializes memory.
+  // Serializes memory and blows flags
   void membar(Membar_mask_bits order_constraint) {
-    // We only have to handle StoreLoad and LoadLoad
-    if (order_constraint & StoreLoad) {
-      // MFENCE subsumes LFENCE
-      mfence();
-    } /* [jk] not needed currently: else if (order_constraint & LoadLoad) {
-         lfence();
-    } */
+    if (os::is_MP()) {
+      // We only have to handle StoreLoad
+      if (order_constraint & StoreLoad) {
+        // All usable chips support "locked" instructions which suffice
+        // as barriers, and are much faster than the alternative of
+        // using cpuid instruction. We use here a locked add [esp],0.
+        // This is conveniently otherwise a no-op except for blowing
+        // flags.
+        // Any change to this code may need to revisit other places in
+        // the code where this idiom is used, in particular the
+        // orderAccess code.
+        lock();
+        addl(Address(rsp, 0), 0);// Assert the lock# signal here
+      }
+    }
   }
 
   void mfence();
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -637,7 +637,7 @@
   address generate_orderaccess_fence() {
     StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
     address start = __ pc();
-    __ mfence();
+    __ membar(Assembler::StoreLoad);
     __ ret(0);
 
     return start;
--- a/hotspot/src/cpu/x86/vm/x86_32.ad	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad	Thu Mar 26 14:31:45 2009 -0700
@@ -4288,24 +4288,6 @@
     emit_opcode(cbuf, 0xC8 + $src2$$reg);
   %}
 
-  enc_class enc_membar_acquire %{
-    // Doug Lea believes this is not needed with current Sparcs and TSO.
-    // MacroAssembler masm(&cbuf);
-    // masm.membar();
-  %}
-
-  enc_class enc_membar_release %{
-    // Doug Lea believes this is not needed with current Sparcs and TSO.
-    // MacroAssembler masm(&cbuf);
-    // masm.membar();
-  %}
-
-  enc_class enc_membar_volatile %{
-    MacroAssembler masm(&cbuf);
-    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
-                                            Assembler::StoreStore));
-  %}
-
   // Atomically load the volatile long
   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
     emit_opcode(cbuf,0xDF);
@@ -7498,9 +7480,9 @@
   ins_cost(400);
 
   size(0);
-  format %{ "MEMBAR-acquire" %}
-  ins_encode( enc_membar_acquire );
-  ins_pipe(pipe_slow);
+  format %{ "MEMBAR-acquire ! (empty encoding)" %}
+  ins_encode();
+  ins_pipe(empty);
 %}
 
 instruct membar_acquire_lock() %{
@@ -7519,9 +7501,9 @@
   ins_cost(400);
 
   size(0);
-  format %{ "MEMBAR-release" %}
-  ins_encode( enc_membar_release );
-  ins_pipe(pipe_slow);
+  format %{ "MEMBAR-release ! (empty encoding)" %}
+  ins_encode( );
+  ins_pipe(empty);
 %}
 
 instruct membar_release_lock() %{
@@ -7535,12 +7517,22 @@
   ins_pipe(empty);
 %}
 
-instruct membar_volatile() %{
+instruct membar_volatile(eFlagsReg cr) %{
   match(MemBarVolatile);
+  effect(KILL cr);
   ins_cost(400);
 
-  format %{ "MEMBAR-volatile" %}
-  ins_encode( enc_membar_volatile );
+  format %{ 
+    $$template
+    if (os::is_MP()) {
+      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
+    } else {
+      $$emit$$"MEMBAR-volatile ! (empty encoding)"
+    }
+  %}
+  ins_encode %{
+    __ membar(Assembler::StoreLoad);
+  %}
   ins_pipe(pipe_slow);
 %}
 
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Thu Mar 26 14:31:45 2009 -0700
@@ -4162,33 +4162,6 @@
     // done:
   %}
 
-  enc_class enc_membar_acquire
-  %{
-    // [jk] not needed currently, if you enable this and it really
-    // emits code don't forget to the remove the "size(0)" line in
-    // membar_acquire()
-    // MacroAssembler masm(&cbuf);
-    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
-    //                                         Assembler::LoadLoad));
-  %}
-
-  enc_class enc_membar_release
-  %{
-    // [jk] not needed currently, if you enable this and it really
-    // emits code don't forget to the remove the "size(0)" line in
-    // membar_release()
-    // MacroAssembler masm(&cbuf);
-    // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore |
-    //                                         Assembler::StoreStore));
-  %}
-
-  enc_class enc_membar_volatile
-  %{
-    MacroAssembler masm(&cbuf);
-    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
-                                            Assembler::StoreStore));
-  %}
-
   // Safepoint Poll.  This polls the safepoint page, and causes an
   // exception if it is not readable. Unfortunately, it kills
   // RFLAGS in the process.
@@ -7458,7 +7431,7 @@
   ins_cost(0);
 
   size(0);
-  format %{ "MEMBAR-acquire" %}
+  format %{ "MEMBAR-acquire ! (empty encoding)" %}
   ins_encode();
   ins_pipe(empty);
 %}
@@ -7481,7 +7454,7 @@
   ins_cost(0);
 
   size(0);
-  format %{ "MEMBAR-release" %}
+  format %{ "MEMBAR-release ! (empty encoding)" %}
   ins_encode();
   ins_pipe(empty);
 %}
@@ -7498,13 +7471,22 @@
   ins_pipe(empty);
 %}
 
-instruct membar_volatile()
-%{
+instruct membar_volatile(rFlagsReg cr) %{
   match(MemBarVolatile);
+  effect(KILL cr);
   ins_cost(400);
 
-  format %{ "MEMBAR-volatile" %}
-  ins_encode(enc_membar_volatile);
+  format %{ 
+    $$template
+    if (os::is_MP()) {
+      $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
+    } else {
+      $$emit$$"MEMBAR-volatile ! (empty encoding)"
+    }
+  %}
+  ins_encode %{
+    __ membar(Assembler::StoreLoad);
+  %}
   ins_pipe(pipe_slow);
 %}
 
--- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -29,13 +29,11 @@
   static jint  (*atomic_cmpxchg_func)     (jint,  volatile jint*,  jint);
   static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
   static jint  (*atomic_add_func)         (jint,  volatile jint*);
-  static void  (*fence_func)              ();
 
   static jint  atomic_xchg_bootstrap        (jint,  volatile jint*);
   static jint  atomic_cmpxchg_bootstrap     (jint,  volatile jint*,  jint);
   static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
   static jint  atomic_add_bootstrap         (jint,  volatile jint*);
-  static void  fence_bootstrap              ();
 
   static void setup_fpu() {}
 
--- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -44,11 +44,12 @@
 
 inline void OrderAccess::fence() {
   if (os::is_MP()) {
+    // always use locked addl since mfence is sometimes expensive
 #ifdef AMD64
-    __asm__ __volatile__ ("mfence":::"memory");
+    __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory");
 #else
     __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
-#endif // AMD64
+#endif
   }
 }
 
--- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -60,22 +60,10 @@
   dummy = 0;
 }
 
-#if defined(COMPILER2) || defined(_LP64)
-
 inline void OrderAccess::fence() {
   _OrderAccess_fence();
 }
 
-#else  // defined(COMPILER2) || defined(_LP64)
-
-inline void OrderAccess::fence() {
-  if (os::is_MP()) {
-    (*os::fence_func)();
-  }
-}
-
-#endif // defined(COMPILER2) || defined(_LP64)
-
 #endif // _GNU_SOURCE
 
 inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { return *p; }
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -619,7 +619,6 @@
 typedef jint  cmpxchg_func_t     (jint,  volatile jint*,  jint);
 typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong);
 typedef jint  add_func_t         (jint,  volatile jint*);
-typedef void  fence_func_t       ();
 
 jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) {
   // try to use the stub:
@@ -681,25 +680,10 @@
   return (*dest) += add_value;
 }
 
-void os::fence_bootstrap() {
-  // try to use the stub:
-  fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
-  if (func != NULL) {
-    os::fence_func = func;
-    (*func)();
-    return;
-  }
-  assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
-  // don't have to do anything for a single thread
-}
-
 xchg_func_t*         os::atomic_xchg_func         = os::atomic_xchg_bootstrap;
 cmpxchg_func_t*      os::atomic_cmpxchg_func      = os::atomic_cmpxchg_bootstrap;
 cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
 add_func_t*          os::atomic_add_func          = os::atomic_add_bootstrap;
-fence_func_t*        os::fence_func               = os::fence_bootstrap;
 
 #endif // !_LP64 && !COMPILER2
 
--- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -29,13 +29,11 @@
   static jint  (*atomic_cmpxchg_func)     (jint,  volatile jint*,  jint);
   static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
   static jint  (*atomic_add_func)         (jint,  volatile jint*);
-  static void  (*fence_func)              ();
 
   static jint  atomic_xchg_bootstrap        (jint,  volatile jint*);
   static jint  atomic_cmpxchg_bootstrap     (jint,  volatile jint*,  jint);
   static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
   static jint  atomic_add_bootstrap         (jint,  volatile jint*);
-  static void  fence_bootstrap              ();
 
   static void setup_fpu() {}
 
--- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -61,11 +61,8 @@
 #endif // AMD64
   }
   inline void _OrderAccess_fence() {
-#ifdef AMD64
-    __asm__ __volatile__ ("mfence":::"memory");
-#else
+    // Always use locked addl since mfence is sometimes expensive
     __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory");
-#endif // AMD64
   }
 
 }
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -794,7 +794,6 @@
 typedef jint  cmpxchg_func_t     (jint,  volatile jint*,  jint);
 typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong);
 typedef jint  add_func_t         (jint,  volatile jint*);
-typedef void  fence_func_t       ();
 
 jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) {
   // try to use the stub:
@@ -856,25 +855,10 @@
   return (*dest) += add_value;
 }
 
-void os::fence_bootstrap() {
-  // try to use the stub:
-  fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
-  if (func != NULL) {
-    os::fence_func = func;
-    (*func)();
-    return;
-  }
-  assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
-  // don't have to do anything for a single thread
-}
-
 xchg_func_t*         os::atomic_xchg_func         = os::atomic_xchg_bootstrap;
 cmpxchg_func_t*      os::atomic_cmpxchg_func      = os::atomic_cmpxchg_bootstrap;
 cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap;
 add_func_t*          os::atomic_add_func          = os::atomic_add_bootstrap;
-fence_func_t*        os::fence_func               = os::fence_bootstrap;
 
 extern "C" _solaris_raw_setup_fpu(address ptr);
 void os::setup_fpu() {
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -32,13 +32,11 @@
   static jint  (*atomic_cmpxchg_func)     (jint,  volatile jint*,  jint);
   static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong);
   static jint  (*atomic_add_func)         (jint,  volatile jint*);
-  static void  (*fence_func)              ();
 
   static jint  atomic_xchg_bootstrap        (jint,  volatile jint*);
   static jint  atomic_cmpxchg_bootstrap     (jint,  volatile jint*,  jint);
   static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong);
   static jint  atomic_add_bootstrap         (jint,  volatile jint*);
-  static void  fence_bootstrap              ();
 
   static void setup_fpu();
 #endif // AMD64
--- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -46,7 +46,7 @@
 
 inline void OrderAccess::fence() {
 #ifdef AMD64
-  (*os::fence_func)();
+  StubRoutines_fence();
 #else
   if (os::is_MP()) {
     __asm {
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -196,7 +196,6 @@
 typedef jlong     cmpxchg_long_func_t    (jlong,    volatile jlong*, jlong);
 typedef jint      add_func_t             (jint,     volatile jint*);
 typedef intptr_t  add_ptr_func_t         (intptr_t, volatile intptr_t*);
-typedef void      fence_func_t           ();
 
 #ifdef AMD64
 
@@ -292,27 +291,11 @@
   return (*dest) += add_value;
 }
 
-void os::fence_bootstrap() {
-  // try to use the stub:
-  fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry());
-
-  if (func != NULL) {
-    os::fence_func = func;
-    (*func)();
-    return;
-  }
-  assert(Threads::number_of_threads() == 0, "for bootstrap only");
-
-  // don't have to do anything for a single thread
-}
-
-
 xchg_func_t*         os::atomic_xchg_func         = os::atomic_xchg_bootstrap;
 xchg_ptr_func_t*     os::atomic_xchg_ptr_func     = os::atomic_xchg_ptr_bootstrap;
 cmpxchg_func_t*      os::atomic_cmpxchg_func      = os::atomic_cmpxchg_bootstrap;
 add_func_t*          os::atomic_add_func          = os::atomic_add_bootstrap;
 add_ptr_func_t*      os::atomic_add_ptr_func      = os::atomic_add_ptr_bootstrap;
-fence_func_t*        os::fence_func               = os::fence_bootstrap;
 
 #endif // AMD64
 
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -35,9 +35,6 @@
   static jint      (*atomic_add_func)           (jint,      volatile jint*);
   static intptr_t  (*atomic_add_ptr_func)       (intptr_t,  volatile intptr_t*);
 
-  static void      (*fence_func)                ();
-
-
   static jint      atomic_xchg_bootstrap        (jint,      volatile jint*);
   static intptr_t  atomic_xchg_ptr_bootstrap    (intptr_t,  volatile intptr_t*);
 
@@ -53,8 +50,6 @@
 #ifdef AMD64
   static jint      atomic_add_bootstrap         (jint,      volatile jint*);
   static intptr_t  atomic_add_ptr_bootstrap     (intptr_t,  volatile intptr_t*);
-
-  static void      fence_bootstrap              ();
 #endif // AMD64
 
   static void setup_fpu();
--- a/hotspot/src/share/vm/includeDB_core	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/includeDB_core	Thu Mar 26 14:31:45 2009 -0700
@@ -3154,6 +3154,8 @@
 oopsHierarchy.cpp                       thread_<os_family>.inline.hpp
 
 orderAccess.cpp                         orderAccess.hpp
+orderAccess.cpp                         stubRoutines.hpp
+orderAccess.cpp                         thread.hpp
 
 orderAccess.hpp                         allocation.hpp
 orderAccess.hpp                         os.hpp
--- a/hotspot/src/share/vm/runtime/orderAccess.cpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.cpp	Thu Mar 26 14:31:45 2009 -0700
@@ -26,3 +26,15 @@
 # include "incls/_orderAccess.cpp.incl"
 
 volatile intptr_t OrderAccess::dummy = 0;
+
+void OrderAccess::StubRoutines_fence() {
+  // Use a stub if it exists.  It may not exist during bootstrap so do
+  // nothing in that case but assert if no fence code exists after threads have been created
+  void (*func)() = CAST_TO_FN_PTR(void (*)(), StubRoutines::fence_entry());
+
+  if (func != NULL) {
+    (*func)();
+    return;
+  }
+  assert(Threads::number_of_threads() == 0, "for bootstrap only");
+}
--- a/hotspot/src/share/vm/runtime/orderAccess.hpp	Tue Mar 24 15:09:52 2009 -0700
+++ b/hotspot/src/share/vm/runtime/orderAccess.hpp	Thu Mar 26 14:31:45 2009 -0700
@@ -300,4 +300,10 @@
   // In order to force a memory access, implementations may
   // need a volatile externally visible dummy variable.
   static volatile intptr_t dummy;
+
+ private:
+  // This is a helper that invokes the StubRoutines::fence_entry()
+  // routine if it exists, It should only be used by platforms that
+  // don't another way to do the inline eassembly.
+  static void StubRoutines_fence();
 };