8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes
authorgoetz
Thu, 16 Jan 2014 14:25:51 +0100
changeset 22868 7f6eb436873b
parent 22867 309bcf262a19
child 22869 d479c1cebc3e
8029101: PPC64 (part 211): ordering of Independent Reads of Independent Writes Reviewed-by: dholmes, kvn Contributed-by: martin.doerr@sap.com
hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp
hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp
hotspot/src/share/vm/opto/library_call.cpp
hotspot/src/share/vm/opto/parse.hpp
hotspot/src/share/vm/opto/parse1.cpp
hotspot/src/share/vm/opto/parse3.cpp
hotspot/src/share/vm/prims/unsafe.cpp
hotspot/src/share/vm/utilities/globalDefinitions.hpp
--- a/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Thu Jan 16 14:25:51 2014 +0100
@@ -37,4 +37,7 @@
 // signatures accordingly.
 const bool CCallingConventionRequiresIntsAsLongs = true;
 
+// The PPC CPUs are NOT multiple-copy-atomic.
+#define CPU_NOT_MULTIPLE_COPY_ATOMIC
+
 #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Jan 16 14:25:51 2014 +0100
@@ -2034,6 +2034,9 @@
           TosState tos_type = cache->flag_state();
           int field_offset = cache->f2_as_index();
           if (cache->is_volatile()) {
+            if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+              OrderAccess::fence();
+            }
             if (tos_type == atos) {
               VERIFY_OOP(obj->obj_field_acquire(field_offset));
               SET_STACK_OBJECT(obj->obj_field_acquire(field_offset), -1);
--- a/hotspot/src/share/vm/opto/library_call.cpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Thu Jan 16 14:25:51 2014 +0100
@@ -2627,8 +2627,13 @@
     // rough approximation of type.
     need_mem_bar = true;
     // For Stores, place a memory ordering barrier now.
-    if (is_store)
+    if (is_store) {
       insert_mem_bar(Op_MemBarRelease);
+    } else {
+      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        insert_mem_bar(Op_MemBarVolatile);
+      }
+    }
   }
 
   // Memory barrier to prevent normal and 'unsafe' accesses from
@@ -2717,10 +2722,13 @@
   }
 
   if (is_volatile) {
-    if (!is_store)
+    if (!is_store) {
       insert_mem_bar(Op_MemBarAcquire);
-    else
-      insert_mem_bar(Op_MemBarVolatile);
+    } else {
+      if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        insert_mem_bar(Op_MemBarVolatile);
+      }
+    }
   }
 
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
--- a/hotspot/src/share/vm/opto/parse.hpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/opto/parse.hpp	Thu Jan 16 14:25:51 2014 +0100
@@ -330,7 +330,8 @@
 
   GraphKit      _exits;         // Record all normal returns and throws here.
   bool          _wrote_final;   // Did we write a final field?
-  bool          _count_invocations; // update and test invocation counter
+  bool          _wrote_volatile;     // Did we write a volatile field?
+  bool          _count_invocations;  // update and test invocation counter
   bool          _method_data_update; // update method data oop
   Node*         _alloc_with_final;   // An allocation node with final field
 
@@ -373,6 +374,8 @@
   GraphKit&     exits()               { return _exits; }
   bool          wrote_final() const   { return _wrote_final; }
   void      set_wrote_final(bool z)   { _wrote_final = z; }
+  bool          wrote_volatile() const { return _wrote_volatile; }
+  void      set_wrote_volatile(bool z) { _wrote_volatile = z; }
   bool          count_invocations() const  { return _count_invocations; }
   bool          method_data_update() const { return _method_data_update; }
   Node*    alloc_with_final() const   { return _alloc_with_final; }
--- a/hotspot/src/share/vm/opto/parse1.cpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Thu Jan 16 14:25:51 2014 +0100
@@ -390,6 +390,7 @@
   _expected_uses = expected_uses;
   _depth = 1 + (caller->has_method() ? caller->depth() : 0);
   _wrote_final = false;
+  _wrote_volatile = false;
   _alloc_with_final = NULL;
   _entry_bci = InvocationEntryBci;
   _tf = NULL;
@@ -907,7 +908,13 @@
   Node* iophi = _exits.i_o();
   _exits.set_i_o(gvn().transform(iophi));
 
-  if (wrote_final()) {
+  // On PPC64, also add MemBarRelease for constructors which write
+  // volatile fields. As support_IRIW_for_not_multiple_copy_atomic_cpu
+  // is set on PPC64, no sync instruction is issued after volatile
+  // stores. We want to quarantee the same behaviour as on platforms
+  // with total store order, although this is not required by the Java
+  // memory model. So as with finals, we add a barrier here.
+  if (wrote_final() PPC64_ONLY(|| (wrote_volatile() && method()->is_initializer()))) {
     // This method (which must be a constructor by the rules of Java)
     // wrote a final.  The effects of all initializations must be
     // committed to memory before any code after the constructor
--- a/hotspot/src/share/vm/opto/parse3.cpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/opto/parse3.cpp	Thu Jan 16 14:25:51 2014 +0100
@@ -227,6 +227,9 @@
   } else {
     type = Type::get_const_basic_type(bt);
   }
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) {
+    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
+  }
   // Build the load.
   //
   MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
@@ -317,7 +320,16 @@
   // If reference is volatile, prevent following volatiles ops from
   // floating up before the volatile write.
   if (is_vol) {
-    insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+    // If not multiple copy atomic, we do the MemBarVolatile before the load.
+    if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+      insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+    }
+    // Remember we wrote a volatile field.
+    // For not multiple copy atomic cpu (ppc64) a barrier should be issued
+    // in constructors which have such stores. See do_exits() in parse1.cpp.
+    if (is_field) {
+      set_wrote_volatile(true);
+    }
   }
 
   // If the field is final, the rules of Java say we are in <init> or <clinit>.
--- a/hotspot/src/share/vm/prims/unsafe.cpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/prims/unsafe.cpp	Thu Jan 16 14:25:51 2014 +0100
@@ -162,6 +162,9 @@
 
 #define GET_FIELD_VOLATILE(obj, offset, type_name, v) \
   oop p = JNIHandles::resolve(obj); \
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) { \
+    OrderAccess::fence(); \
+  } \
   volatile type_name v = OrderAccess::load_acquire((volatile type_name*)index_oop_from_field_offset_long(p, offset));
 
 #define SET_FIELD_VOLATILE(obj, offset, type_name, x) \
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Tue Jan 07 17:24:59 2014 +0100
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp	Thu Jan 16 14:25:51 2014 +0100
@@ -398,6 +398,17 @@
 #define PLATFORM_NATIVE_STACK_WALKING_SUPPORTED 1
 #endif
 
+// To assure the IRIW property on processors that are not multiple copy
+// atomic, sync instructions must be issued between volatile reads to
+// assure their ordering, instead of after volatile stores.
+// (See "A Tutorial Introduction to the ARM and POWER Relaxed Memory Models"
+// by Luc Maranget, Susmit Sarkar and Peter Sewell, INRIA/Cambridge)
+#ifdef CPU_NOT_MULTIPLE_COPY_ATOMIC
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = true;
+#else
+const bool support_IRIW_for_not_multiple_copy_atomic_cpu = false;
+#endif
+
 // The byte alignment to be used by Arena::Amalloc.  See bugid 4169348.
 // Note: this value must be a power of 2