8151775: aarch64: add support for 8.1 LSE atomic operations
authorenevill
Tue, 29 Mar 2016 10:07:54 +0000
changeset 37269 5c2c4e5bb067
parent 36844 ce97bc269a80
child 37270 a24f2c20cc24
8151775: aarch64: add support for 8.1 LSE atomic operations Reviewed-by: aph
hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp
hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Mar 29 10:07:54 2016 +0000
@@ -1221,6 +1221,38 @@
   INSN(caspal,  true,  true)
 #undef INSN
 
+  // 8.1 Atomic operations
+  void lse_atomic(Register Rs, Register Rt, Register Rn,
+                  enum operand_size sz, int op1, int op2, bool a, bool r) {
+    starti;
+    f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21);
+    rf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), rf(Rn, 5), zrf(Rt, 0);
+  }
+
+#define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2)                   \
+  void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
+    lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false);                 \
+  }                                                                     \
+  void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \
+    lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false);                  \
+  }                                                                     \
+  void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \
+    lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true);                  \
+  }                                                                     \
+  void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\
+    lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true);                   \
+  }
+  INSN(ldadd,  ldadda,  ldaddl,  ldaddal,  0, 0b000);
+  INSN(ldbic,  ldbica,  ldbicl,  ldbical,  0, 0b001);
+  INSN(ldeor,  ldeora,  ldeorl,  ldeoral,  0, 0b010);
+  INSN(ldorr,  ldorra,  ldorrl,  ldorral,  0, 0b011);
+  INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100);
+  INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101);
+  INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110);
+  INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111);
+  INSN(swp,    swpa,    swpl,    swpal,    1, 0b000);
+#undef INSN
+
   // Load register (literal)
 #define INSN(NAME, opc, V)                                              \
   void NAME(Register Rt, address dest) {                                \
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Mar 29 10:07:54 2016 +0000
@@ -1556,54 +1556,14 @@
 }
 
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-  if (UseLSE) {
-    __ mov(rscratch1, cmpval);
-    __ casal(Assembler::word, rscratch1, newval, addr);
-    __ cmpw(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-  } else {
-    Label retry_load, nope;
-    // flush and load exclusive from the memory location
-    // and fail if it is not what we expect
-    __ prfm(Address(addr), PSTL1STRM);
-    __ bind(retry_load);
-    __ ldaxrw(rscratch1, addr);
-    __ cmpw(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-    __ br(Assembler::NE, nope);
-    // if we store+flush with no intervening write rscratch1 wil be zero
-    __ stlxrw(rscratch1, newval, addr);
-    // retry so we only ever return after a load fails to compare
-    // ensures we don't return a stale value after a failed write.
-    __ cbnzw(rscratch1, retry_load);
-    __ bind(nope);
-  }
+  __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
 
 void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
-  if (UseLSE) {
-    __ mov(rscratch1, cmpval);
-    __ casal(Assembler::xword, rscratch1, newval, addr);
-    __ cmp(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-  } else {
-    Label retry_load, nope;
-    // flush and load exclusive from the memory location
-    // and fail if it is not what we expect
-    __ prfm(Address(addr), PSTL1STRM);
-    __ bind(retry_load);
-    __ ldaxr(rscratch1, addr);
-    __ cmp(rscratch1, cmpval);
-    __ cset(rscratch1, Assembler::NE);
-    __ br(Assembler::NE, nope);
-    // if we store+flush with no intervening write rscratch1 wil be zero
-    __ stlxr(rscratch1, newval, addr);
-    // retry so we only ever return after a load fails to compare
-    // ensures we don't return a stale value after a failed write.
-    __ cbnz(rscratch1, retry_load);
-    __ bind(nope);
-  }
+  __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
 
@@ -3121,38 +3081,32 @@
   BasicType type = src->type();
   bool is_oop = type == T_OBJECT || type == T_ARRAY;
 
-  void (MacroAssembler::* lda)(Register Rd, Register Ra);
-  void (MacroAssembler::* add)(Register Rd, Register Rn, RegisterOrConstant increment);
-  void (MacroAssembler::* stl)(Register Rs, Register Rt, Register Rn);
+  void (MacroAssembler::* add)(Register prev, RegisterOrConstant incr, Register addr);
+  void (MacroAssembler::* xchg)(Register prev, Register newv, Register addr);
 
   switch(type) {
   case T_INT:
-    lda = &MacroAssembler::ldaxrw;
-    add = &MacroAssembler::addw;
-    stl = &MacroAssembler::stlxrw;
+    xchg = &MacroAssembler::atomic_xchgalw;
+    add = &MacroAssembler::atomic_addalw;
     break;
   case T_LONG:
-    lda = &MacroAssembler::ldaxr;
-    add = &MacroAssembler::add;
-    stl = &MacroAssembler::stlxr;
+    xchg = &MacroAssembler::atomic_xchgal;
+    add = &MacroAssembler::atomic_addal;
     break;
   case T_OBJECT:
   case T_ARRAY:
     if (UseCompressedOops) {
-      lda = &MacroAssembler::ldaxrw;
-      add = &MacroAssembler::addw;
-      stl = &MacroAssembler::stlxrw;
+      xchg = &MacroAssembler::atomic_xchgalw;
+      add = &MacroAssembler::atomic_addalw;
     } else {
-      lda = &MacroAssembler::ldaxr;
-      add = &MacroAssembler::add;
-      stl = &MacroAssembler::stlxr;
+      xchg = &MacroAssembler::atomic_xchgal;
+      add = &MacroAssembler::atomic_addal;
     }
     break;
   default:
     ShouldNotReachHere();
-    lda = &MacroAssembler::ldaxr;
-    add = &MacroAssembler::add;
-    stl = &MacroAssembler::stlxr;  // unreachable
+    xchg = &MacroAssembler::atomic_xchgal;
+    add = &MacroAssembler::atomic_addal; // unreachable
   }
 
   switch (code) {
@@ -3170,14 +3124,8 @@
         assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
                                    rscratch1, rscratch2);
       }
-      Label again;
       __ lea(tmp, addr);
-      __ prfm(Address(tmp), PSTL1STRM);
-      __ bind(again);
-      (_masm->*lda)(dst, tmp);
-      (_masm->*add)(rscratch1, dst, inc);
-      (_masm->*stl)(rscratch2, rscratch1, tmp);
-      __ cbnzw(rscratch2, again);
+      (_masm->*add)(dst, inc, tmp);
       break;
     }
   case lir_xchg:
@@ -3190,13 +3138,8 @@
         obj = rscratch1;
       }
       assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
-      Label again;
       __ lea(tmp, addr);
-      __ prfm(Address(tmp), PSTL1STRM);
-      __ bind(again);
-      (_masm->*lda)(dst, tmp);
-      (_masm->*stl)(rscratch2, obj, tmp);
-      __ cbnzw(rscratch2, again);
+      (_masm->*xchg)(dst, obj, tmp);
       if (is_oop && UseCompressedOops) {
         __ decode_heap_oop(dst);
       }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Mar 29 10:07:54 2016 +0000
@@ -1637,6 +1637,11 @@
 }
 
 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
+  if (UseLSE) {
+    mov(tmp, 1);
+    ldadd(Assembler::word, tmp, zr, counter_addr);
+    return;
+  }
   Label retry_load;
   prfm(Address(counter_addr), PSTL1STRM);
   bind(retry_load);
@@ -2172,8 +2177,18 @@
     return a != b.as_register() && a != c && b.as_register() != c;
 }
 
-#define ATOMIC_OP(LDXR, OP, IOP, STXR)                                       \
-void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
+#define ATOMIC_OP(NAME, LDXR, OP, IOP, AOP, STXR, sz)                   \
+void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
+  if (UseLSE) {                                                         \
+    prev = prev->is_valid() ? prev : zr;                                \
+    if (incr.is_register()) {                                           \
+      AOP(sz, incr.as_register(), prev, addr);                          \
+    } else {                                                            \
+      mov(rscratch2, incr.as_constant());                               \
+      AOP(sz, rscratch2, prev, addr);                                   \
+    }                                                                   \
+    return;                                                             \
+  }                                                                     \
   Register result = rscratch2;                                          \
   if (prev->is_valid())                                                 \
     result = different(prev, incr, addr) ? prev : rscratch2;            \
@@ -2190,13 +2205,20 @@
   }                                                                     \
 }
 
-ATOMIC_OP(ldxr, add, sub, stxr)
-ATOMIC_OP(ldxrw, addw, subw, stxrw)
+ATOMIC_OP(add, ldxr, add, sub, ldadd, stxr, Assembler::xword)
+ATOMIC_OP(addw, ldxrw, addw, subw, ldadd, stxrw, Assembler::word)
+ATOMIC_OP(addal, ldaxr, add, sub, ldaddal, stlxr, Assembler::xword)
+ATOMIC_OP(addalw, ldaxrw, addw, subw, ldaddal, stlxrw, Assembler::word)
 
 #undef ATOMIC_OP
 
-#define ATOMIC_XCHG(OP, LDXR, STXR)                                     \
+#define ATOMIC_XCHG(OP, AOP, LDXR, STXR, sz)                            \
 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \
+  if (UseLSE) {                                                         \
+    prev = prev->is_valid() ? prev : zr;                                \
+    AOP(sz, newv, prev, addr);                                          \
+    return;                                                             \
+  }                                                                     \
   Register result = rscratch2;                                          \
   if (prev->is_valid())                                                 \
     result = different(prev, newv, addr) ? prev : rscratch2;            \
@@ -2211,8 +2233,10 @@
     mov(prev, result);                                                  \
 }
 
-ATOMIC_XCHG(xchg, ldxr, stxr)
-ATOMIC_XCHG(xchgw, ldxrw, stxrw)
+ATOMIC_XCHG(xchg, swp, ldxr, stxr, Assembler::xword)
+ATOMIC_XCHG(xchgw, swp, ldxrw, stxrw, Assembler::word)
+ATOMIC_XCHG(xchgal, swpal, ldaxr, stlxr, Assembler::xword)
+ATOMIC_XCHG(xchgalw, swpal, ldaxrw, stlxrw, Assembler::word)
 
 #undef ATOMIC_XCHG
 
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Mar 29 10:07:54 2016 +0000
@@ -957,9 +957,13 @@
 
   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
+  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
 
   void atomic_xchg(Register prev, Register newv, Register addr);
   void atomic_xchgw(Register prev, Register newv, Register addr);
+  void atomic_xchgal(Register prev, Register newv, Register addr);
+  void atomic_xchgalw(Register prev, Register newv, Register addr);
 
   void orptr(Address adr, RegisterOrConstant src) {
     ldr(rscratch2, adr);
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Tue Mar 29 10:07:54 2016 +0000
@@ -1982,14 +1982,8 @@
   __ push(rscratch1);
   __ push(rscratch2);
   __ push(rscratch3);
-  Label L;
-  __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
-  __ prfm(Address(rscratch2), PSTL1STRM);
-  __ bind(L);
-  __ ldxr(rscratch1, rscratch2);
-  __ add(rscratch1, rscratch1, 1);
-  __ stxr(rscratch3, rscratch1, rscratch2);
-  __ cbnzw(rscratch3, L);
+  __ mov(rscratch3, (address) &BytecodeCounter::_counter_value);
+  __ atomic_add(noreg, 1, rscratch3);
   __ pop(rscratch3);
   __ pop(rscratch2);
   __ pop(rscratch1);
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed Mar 30 12:34:08 2016 +0200
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Tue Mar 29 10:07:54 2016 +0000
@@ -73,6 +73,7 @@
     CPU_SHA1         = (1<<5),
     CPU_SHA2         = (1<<6),
     CPU_CRC32        = (1<<7),
+    CPU_LSE          = (1<<8),
     CPU_A53MAC       = (1 << 30),
     CPU_DMB_ATOMICS  = (1 << 31),
   };