8141633: Implement VarHandles/Unsafe intrinsics on AArch64
authoraph
Thu, 07 Jul 2016 15:07:13 +0100
changeset 40049 a23a3ed6c7a6
parent 40046 01e973266ee4
child 40050 a2af6fb7986e
8141633: Implement VarHandles/Unsafe intrinsics on AArch64 Reviewed-by: roland
hotspot/src/cpu/aarch64/vm/aarch64.ad
hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/cas.m4
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Tue Jul 05 17:57:20 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Thu Jul 07 15:07:13 2016 +0100
@@ -1942,11 +1942,34 @@
 
   bool is_CAS(int opcode)
   {
-    return (opcode == Op_CompareAndSwapI ||
-	    opcode == Op_CompareAndSwapL ||
-	    opcode == Op_CompareAndSwapN ||
-	    opcode == Op_CompareAndSwapP);
-  }
+    switch(opcode) {
+      // We handle these
+    case Op_CompareAndSwapI:
+    case Op_CompareAndSwapL:
+    case Op_CompareAndSwapP:
+    case Op_CompareAndSwapN:
+ // case Op_CompareAndSwapB:
+ // case Op_CompareAndSwapS:
+      return true;
+      // These are TBD
+    case Op_WeakCompareAndSwapB:
+    case Op_WeakCompareAndSwapS:
+    case Op_WeakCompareAndSwapI:
+    case Op_WeakCompareAndSwapL:
+    case Op_WeakCompareAndSwapP:
+    case Op_WeakCompareAndSwapN:
+    case Op_CompareAndExchangeB:
+    case Op_CompareAndExchangeS:
+    case Op_CompareAndExchangeI:
+    case Op_CompareAndExchangeL:
+    case Op_CompareAndExchangeP:
+    case Op_CompareAndExchangeN:
+      return false;
+    default:
+      return false;
+    }
+  }
+
 
   // leading_to_trailing
   //
@@ -4238,14 +4261,16 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               Assembler::xword, /*acquire*/ false, /*release*/ true);
+               Assembler::xword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, noreg);
   %}
 
   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               Assembler::word, /*acquire*/ false, /*release*/ true);
+               Assembler::word, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, noreg);
   %}
 
 
@@ -4257,14 +4282,16 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               Assembler::xword, /*acquire*/ true, /*release*/ true);
+               Assembler::xword, /*acquire*/ true, /*release*/ true,
+               /*weak*/ false, noreg);
   %}
 
   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               Assembler::word, /*acquire*/ true, /*release*/ true);
+               Assembler::word, /*acquire*/ true, /*release*/ true,
+               /*weak*/ false, noreg);
   %}
 
 
@@ -5803,6 +5830,7 @@
 %{
   constraint(ALLOC_IN_RC(no_special_reg));
   match(RegL);
+  match(iRegL_R0);
   format %{ %}
   interface(REG_INTER);
 %}
@@ -5924,6 +5952,39 @@
   interface(REG_INTER);
 %}
 
+// Long 64 bit Register R0 only
+operand iRegL_R0()
+%{
+  constraint(ALLOC_IN_RC(r0_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R2 only
+operand iRegL_R2()
+%{
+  constraint(ALLOC_IN_RC(r2_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Long 64 bit Register R3 only
+operand iRegL_R3()
+%{
+  constraint(ALLOC_IN_RC(r3_reg));
+  match(RegL);
+  match(iRegLNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Long 64 bit Register R11 only
 operand iRegL_R11()
 %{
@@ -5980,7 +6041,7 @@
 %}
 
 
-// Register R2 only
+// Register R4 only
 operand iRegI_R4()
 %{
   constraint(ALLOC_IN_RC(int_r4_reg));
@@ -6004,6 +6065,33 @@
   interface(REG_INTER);
 %}
 
+operand iRegN_R0()
+%{
+  constraint(ALLOC_IN_RC(r0_reg));
+  match(iRegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegN_R2()
+%{
+  constraint(ALLOC_IN_RC(r2_reg));
+  match(iRegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand iRegN_R3()
+%{
+  constraint(ALLOC_IN_RC(r3_reg));
+  match(iRegN);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Integer 64 bit Register not Special
 operand iRegNNoSp()
 %{
@@ -9390,12 +9478,12 @@
   ins_pipe(pipe_slow);
 %}
 
+// standard CompareAndSwapX when we are using barriers
+// these have higher priority than the rules selected by a predicate
+
 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
 // can't match them
 
-// standard CompareAndSwapX when we are using barriers
-// these have higher priority than the rules selected by a predicate
-
 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
@@ -9547,6 +9635,216 @@
 %}
 
 
+// ---------------------------------------------------------------------
+// Sundry CAS operations.  Note that release is always true,
+// regardless of the memory ordering of the CAS.  This is because we
+// need the volatile case to be sequentially consistent but there is
+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
+// can't check the type of memory ordering here, so we always emit a
+// STLXR.
+
+// This section is generated from aarch64_ad_cas.m4
+
+
+instruct compareAndExchangeB(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ uxtbw(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::byte, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+    __ sxtbw($res$$Register, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeS(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ uxthw(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::halfword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+    __ sxthw($res$$Register, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeI(iRegI_R0 res, indirect mem, iRegI_R2 oldval, iRegI_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::word, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeL(iRegL_R0 res, indirect mem, iRegL_R2 oldval, iRegL_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::xword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeN(iRegN_R0 res, indirect mem, iRegN_R2 oldval, iRegN_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::word, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndExchangeP(iRegP_R0 res, indirect mem, iRegP_R2 oldval, iRegP_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::xword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ false, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ uxtbw(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::byte, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ uxthw(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::halfword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::word, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::xword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::word, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::xword, /*acquire*/ false, /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}
+// ---------------------------------------------------------------------
+
 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
   match(Set prev (GetAndSetI mem newv));
   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Jul 05 17:57:20 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Thu Jul 07 15:07:13 2016 +0100
@@ -1556,13 +1556,13 @@
 }
 
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-  __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
   __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
 
 void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
-  __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, rscratch1);
+  __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
   __ cset(rscratch1, Assembler::NE);
   __ membar(__ AnyAny);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/src/cpu/aarch64/vm/cas.m4	Thu Jul 07 15:07:13 2016 +0100
@@ -0,0 +1,109 @@
+// Sundry CAS operations.  Note that release is always true,
+// regardless of the memory ordering of the CAS.  This is because we
+// need the volatile case to be sequentially consistent but there is
+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
+// can't check the type of memory ordering here, so we always emit a
+// STLXR.
+
+define(`CAS_INSN',
+`
+instruct compareAndExchange$1$5(iReg$2_R0 res, indirect mem, iReg$2_R2 oldval, iReg$2_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
+  ifelse($5,Acq,'  predicate(needs_acquiring_load_exclusive(n));
+  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
+               /*weak*/ false, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+define(`CAS_INSN4',
+`
+instruct compareAndExchange$1$7(iReg$2_R0 res, indirect mem, iReg$2_R2 oldval, iReg$2_R3 newval, rFlagsReg cr) %{
+  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
+  ifelse($7,Acq,'  predicate(needs_acquiring_load_exclusive(n));
+  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
+  %}
+  ins_encode %{
+    __ $5(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
+               /*weak*/ false, $res$$Register);
+    __ $6($res$$Register, $res$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw)
+CAS_INSN4(S,I,short,halfword,uxthw,sxthw)
+CAS_INSN(I,I,int,word)
+CAS_INSN(L,L,long,xword)
+CAS_INSN(N,N,narrow oop,word)
+CAS_INSN(P,P,ptr,xword)
+dnl
+dnl CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw,Acq)
+dnl CAS_INSN4(S,I,short,halfword,uxthw,sxthw,Acq)
+dnl CAS_INSN(I,I,int,word,Acq)
+dnl CAS_INSN(L,L,long,xword,Acq)
+dnl CAS_INSN(N,N,narrow oop,word,Acq)
+dnl CAS_INSN(P,P,ptr,xword,Acq)
+dnl
+define(`CAS_INSN2',
+`
+instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
+  ifelse($6,Acq,'  predicate(needs_acquiring_load_exclusive(n));
+  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ uxt$5(rscratch2, $oldval$$Register);
+    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
+               Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+define(`CAS_INSN3',
+`
+instruct weakCompareAndSwap$1$5(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
+  match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
+  ifelse($5,Acq,'  predicate(needs_acquiring_load_exclusive(n));
+  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
+  effect(KILL cr);
+  format %{
+    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
+    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+  %}
+  ins_encode %{
+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
+               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
+               /*weak*/ true, noreg);
+    __ csetw($res$$Register, Assembler::EQ);
+  %}
+  ins_pipe(pipe_slow);
+%}')dnl
+CAS_INSN2(B,I,byte,byte,bw)
+CAS_INSN2(S,I,short,halfword,hw)
+CAS_INSN3(I,I,int,word)
+CAS_INSN3(L,L,long,xword)
+CAS_INSN3(N,N,narrow oop,word)
+CAS_INSN3(P,P,ptr,xword)
+dnl CAS_INSN2(B,I,byte,byte,bw,Acq)
+dnl CAS_INSN2(S,I,short,halfword,hw,Acq)
+dnl CAS_INSN3(I,I,int,word,Acq)
+dnl CAS_INSN3(L,L,long,xword,Acq)
+dnl CAS_INSN3(N,N,narrow oop,word,Acq)
+dnl CAS_INSN3(P,P,ptr,xword,Acq)
+dnl
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Jul 05 17:57:20 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 07 15:07:13 2016 +0100
@@ -2141,30 +2141,40 @@
     b(*fail);
 }
 
-// A generic CAS; success or failure is in the EQ flag.
+// A generic CAS; success or failure is in the EQ flag.  A weak CAS
+// doesn't retry and may fail spuriously.  If the oldval is wanted,
+// Pass a register for the result, otherwise pass noreg.
+
+// Clobbers rscratch1
 void MacroAssembler::cmpxchg(Register addr, Register expected,
                              Register new_val,
                              enum operand_size size,
                              bool acquire, bool release,
-                             Register tmp) {
+                             bool weak,
+                             Register result) {
+  if (result == noreg)  result = rscratch1;
   if (UseLSE) {
-    mov(tmp, expected);
-    lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
-    cmp(tmp, expected);
+    mov(result, expected);
+    lse_cas(result, new_val, addr, size, acquire, release, /*not_pair*/ true);
+    cmp(result, expected);
   } else {
     BLOCK_COMMENT("cmpxchg {");
     Label retry_load, done;
     if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
       prfm(Address(addr), PSTL1STRM);
     bind(retry_load);
-    load_exclusive(tmp, addr, size, acquire);
+    load_exclusive(result, addr, size, acquire);
     if (size == xword)
-      cmp(tmp, expected);
+      cmp(result, expected);
     else
-      cmpw(tmp, expected);
+      cmpw(result, expected);
     br(Assembler::NE, done);
-    store_exclusive(tmp, new_val, addr, size, release);
-    cbnzw(tmp, retry_load);
+    store_exclusive(rscratch1, new_val, addr, size, release);
+    if (weak) {
+      cmpw(rscratch1, 0u);  // If the store fails, return NE to our caller.
+    } else {
+      cbnzw(rscratch1, retry_load);
+    }
     bind(done);
     BLOCK_COMMENT("} cmpxchg");
   }
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Jul 05 17:57:20 2016 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jul 07 15:07:13 2016 +0100
@@ -995,10 +995,11 @@
   }
 
   // A generic CAS; success or failure is in the EQ flag.
+  // Clobbers rscratch1
   void cmpxchg(Register addr, Register expected, Register new_val,
                enum operand_size size,
-               bool acquire, bool release,
-               Register tmp = rscratch1);
+               bool acquire, bool release, bool weak,
+               Register result);
 
   // Calls