8179954: AArch64: C1 and C2 volatile accesses are not sequentially consistent
authoraph
Thu, 11 May 2017 13:11:42 +0100
changeset 45115 e3a622b2b7db
parent 45114 45644c5f6b8e
child 45116 87eadc18f199
child 45170 39a6c3511153
child 45236 1b8879e6d9c2
8179954: AArch64: C1 and C2 volatile accesses are not sequentially consistent Reviewed-by: roland
hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Mon May 15 12:20:15 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Thu May 11 13:11:42 2017 +0100
@@ -1347,6 +1347,16 @@
 
 void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
                                        CodeEmitInfo* info) {
+  // 8179954: We need to make sure that the code generated for
+  // volatile accesses forms a sequentially-consistent set of
+  // operations when combined with STLR and LDAR.  Without a leading
+  // membar it's possible for a simple Dekker test to fail if loads
+  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
+  // the stores in one method and C1 compiles the loads in another.
+  if (! UseBarriersForVolatile) {
+    __ membar();
+  }
+
   __ volatile_load_mem_reg(address, result, info);
 }
 
--- a/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Mon May 15 12:20:15 2017 +0200
+++ b/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Thu May 11 13:11:42 2017 +0100
@@ -2389,17 +2389,31 @@
   const Register obj   = r4;
   const Register off   = r19;
   const Register flags = r0;
+  const Register raw_flags = r6;
   const Register bc    = r4; // uses same reg as obj, so don't mix them
 
   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
   jvmti_post_field_access(cache, index, is_static, false);
-  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static);
 
   if (!is_static) {
     // obj is on the stack
     pop_and_check_object(obj);
   }
 
+  // 8179954: We need to make sure that the code generated for
+  // volatile accesses forms a sequentially-consistent set of
+  // operations when combined with STLR and LDAR.  Without a leading
+  // membar it's possible for a simple Dekker test to fail if loads
+  // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
+  // the stores in one method and we interpret the loads in another.
+  if (! UseBarriersForVolatile) {
+    Label notVolatile;
+    __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
   const Address field(obj, off);
 
   Label Done, notByte, notBool, notInt, notShort, notChar,
@@ -2407,7 +2421,8 @@
 
   // x86 uses a shift and mask or wings it with a shift plus assert
   // the mask is not needed. aarch64 just uses bitfield extract
-  __ ubfxw(flags, flags, ConstantPoolCacheEntry::tos_state_shift,  ConstantPoolCacheEntry::tos_state_bits);
+  __ ubfxw(flags, raw_flags, ConstantPoolCacheEntry::tos_state_shift,
+           ConstantPoolCacheEntry::tos_state_bits);
 
   assert(btos == 0, "change code, btos != 0");
   __ cbnz(flags, notByte);
@@ -2529,9 +2544,11 @@
 #endif
 
   __ bind(Done);
-  // It's really not worth bothering to check whether this field
-  // really is volatile in the slow case.
+
+  Label notVolatile;
+  __ tbz(raw_flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
   __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
+  __ bind(notVolatile);
 }
 
 
@@ -2979,6 +2996,19 @@
   __ null_check(r0);
   const Address field(r0, r1);
 
+  // 8179954: We need to make sure that the code generated for
+  // volatile accesses forms a sequentially-consistent set of
+  // operations when combined with STLR and LDAR.  Without a leading
+  // membar it's possible for a simple Dekker test to fail if loads
+  // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
+  // the stores in one method and we interpret the loads in another.
+  if (! UseBarriersForVolatile) {
+    Label notVolatile;
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
   // access field
   switch (bytecode()) {
   case Bytecodes::_fast_agetfield:
@@ -3027,6 +3057,22 @@
   __ get_cache_and_index_at_bcp(r2, r3, 2);
   __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
                                   ConstantPoolCacheEntry::f2_offset())));
+
+  // 8179954: We need to make sure that the code generated for
+  // volatile accesses forms a sequentially-consistent set of
+  // operations when combined with STLR and LDAR.  Without a leading
+  // membar it's possible for a simple Dekker test to fail if loads
+  // use LDR;DMB but stores use STLR.  This can happen if C2 compiles
+  // the stores in one method and we interpret the loads in another.
+  if (! UseBarriersForVolatile) {
+    Label notVolatile;
+    __ ldrw(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() +
+                                     ConstantPoolCacheEntry::flags_offset())));
+    __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
   // make sure exception is reported in correct bcp range (getfield is
   // next instruction)
   __ increment(rbcp);