# HG changeset patch # User enevill # Date 1457447990 0 # Node ID 4d1e93624d6a6e68629e5960e19f1fbb05975538 # Parent b18243f4d955f964ea06f170f7b36fbda750b115 8150394: aarch64: add support for 8.1 LSE CAS instructions Reviewed-by: aph Contributed-by: ananth.jasty@caviumnetworks.com, edward.nevill@linaro.org diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/aarch64.ad --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad Tue Mar 08 14:39:50 2016 +0000 @@ -4132,14 +4132,14 @@ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr); + Assembler::xword, /*acquire*/ false, /*release*/ true); %} enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); + Assembler::word, /*acquire*/ false, /*release*/ true); %} @@ -4151,14 +4151,14 @@ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr); + Assembler::xword, /*acquire*/ true, /*release*/ true); %} enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register, - &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw); + Assembler::word, /*acquire*/ true, /*release*/ true); %} @@ -4676,7 +4676,12 @@ // Compare object markOop with mark and if equal exchange scratch1 // with object markOop. - { + if (UseLSE) { + __ mov(tmp, disp_hdr); + __ casal(Assembler::xword, tmp, box, oop); + __ cmp(tmp, disp_hdr); + __ br(Assembler::EQ, cont); + } else { Label retry_load; __ bind(retry_load); __ ldaxr(tmp, oop); @@ -4726,7 +4731,11 @@ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value)); __ mov(disp_hdr, zr); - { + if (UseLSE) { + __ mov(rscratch1, disp_hdr); + __ casal(Assembler::xword, rscratch1, rthread, tmp); + __ cmp(rscratch1, disp_hdr); + } else { Label retry_load, fail; __ bind(retry_load); __ ldaxr(rscratch1, tmp); @@ -4815,7 +4824,11 @@ // see the stack address of the basicLock in the markOop of the // object. - { + if (UseLSE) { + __ mov(tmp, box); + __ casl(Assembler::xword, tmp, disp_hdr, oop); + __ cmp(tmp, box); + } else { Label retry_load; __ bind(retry_load); __ ldxr(tmp, oop); diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Mar 08 14:39:50 2016 +0000 @@ -972,7 +972,7 @@ // System void system(int op0, int op1, int CRn, int CRm, int op2, - Register rt = (Register)0b11111) + Register rt = dummy_reg) { starti; f(0b11010101000, 31, 21); @@ -1082,7 +1082,7 @@ #define INSN(NAME, opc) \ void NAME() { \ - branch_reg((Register)0b11111, opc); \ + branch_reg(dummy_reg, opc); \ } INSN(eret, 0b0100); @@ -1094,10 +1094,22 @@ enum operand_size { byte, halfword, word, xword }; void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, - Register Rn, enum operand_size sz, int op, int o0) { + Register Rn, enum operand_size sz, int op, bool ordered) { starti; f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); - rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); + rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0); + } + + void load_exclusive(Register dst, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(dummy_reg, dst, dummy_reg, addr, + sz, 0b010, ordered); + } + + void store_exclusive(Register status, Register new_val, Register addr, + enum operand_size sz, bool ordered) { + load_store_exclusive(status, new_val, dummy_reg, addr, + sz, 0b000, ordered); } #define INSN4(NAME, sz, op, o0) /* Four registers */ \ @@ -1109,19 +1121,19 @@ #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ - load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ + load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \ } #define INSN2(NAME, sz, op, o0) /* Two registers */ \ void NAME(Register Rt, Register Rn) { \ - load_store_exclusive((Register)0b11111, Rt, (Register)0b11111, \ + load_store_exclusive(dummy_reg, Rt, dummy_reg, \ Rn, sz, op, o0); \ } #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ void NAME(Register Rt1, Register Rt2, Register Rn) { \ guarantee(Rt1 != Rt2, "unpredictable instruction"); \ - load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ + load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \ } // bytes @@ -1169,6 +1181,46 @@ #undef INSN4 #undef INSN_FOO + // 8.1 Compare and swap extensions + void lse_cas(Register Rs, Register Rt, Register Rn, + enum operand_size sz, bool a, bool r, bool not_pair) { + starti; + if (! not_pair) { // Pair + assert(sz == word || sz == xword, "invalid size"); + /* The size bit is in bit 30, not 31 */ + sz = (operand_size)(sz == word ? 0b00:0b01); + } + f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21); + rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0); + } + + // CAS +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, true); \ + } + INSN(cas, false, false) + INSN(casa, true, false) + INSN(casl, false, true) + INSN(casal, true, true) +#undef INSN + + // CASP +#define INSN(NAME, a, r) \ + void NAME(operand_size sz, Register Rs, Register Rs1, \ + Register Rt, Register Rt1, Register Rn) { \ + assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \ + Rs->successor() == Rs1 && Rt->successor() == Rt1 && \ + Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \ + lse_cas(Rs, Rt, Rn, sz, a, r, false); \ + } + INSN(casp, false, false) + INSN(caspa, true, false) + INSN(caspl, false, true) + INSN(caspal, true, true) +#undef INSN + // Load register (literal) #define INSN(NAME, opc, V) \ void NAME(Register Rt, address dest) { \ diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Tue Mar 08 14:39:50 2016 +0000 @@ -1556,38 +1556,52 @@ } void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { - Label retry_load, nope; - // flush and load exclusive from the memory location - // and fail if it is not what we expect - __ bind(retry_load); - __ ldaxrw(rscratch1, addr); - __ cmpw(rscratch1, cmpval); - __ cset(rscratch1, Assembler::NE); - __ br(Assembler::NE, nope); - // if we store+flush with no intervening write rscratch1 wil be zero - __ stlxrw(rscratch1, newval, addr); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - __ cbnzw(rscratch1, retry_load); - __ bind(nope); + if (UseLSE) { + __ mov(rscratch1, cmpval); + __ casal(Assembler::word, rscratch1, newval, addr); + __ cmpw(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + } else { + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ ldaxrw(rscratch1, addr); + __ cmpw(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + __ br(Assembler::NE, nope); + // if we store+flush with no intervening write rscratch1 wil be zero + __ stlxrw(rscratch1, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnzw(rscratch1, retry_load); + __ bind(nope); + } __ membar(__ AnyAny); } void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { - Label retry_load, nope; - // flush and load exclusive from the memory location - // and fail if it is not what we expect - __ bind(retry_load); - __ ldaxr(rscratch1, addr); - __ cmp(rscratch1, cmpval); - __ cset(rscratch1, Assembler::NE); - __ br(Assembler::NE, nope); - // if we store+flush with no intervening write rscratch1 wil be zero - __ stlxr(rscratch1, newval, addr); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - __ cbnz(rscratch1, retry_load); - __ bind(nope); + if (UseLSE) { + __ mov(rscratch1, cmpval); + __ casal(Assembler::xword, rscratch1, newval, addr); + __ cmp(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + } else { + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ ldaxr(rscratch1, addr); + __ cmp(rscratch1, cmpval); + __ cset(rscratch1, Assembler::NE); + __ br(Assembler::NE, nope); + // if we store+flush with no intervening write rscratch1 wil be zero + __ stlxr(rscratch1, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(rscratch1, retry_load); + __ bind(nope); + } __ membar(__ AnyAny); } diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp Tue Mar 08 14:39:50 2016 +0000 @@ -103,6 +103,9 @@ \ product(bool, UseCRC32, false, \ "Use CRC32 instructions for CRC32 computation") \ + \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ // Don't attempt to use Neon on builtin sim until builtin sim supports it #define UseCRC32 false @@ -123,6 +126,8 @@ "Use Neon for CRC32 computation") \ product(bool, UseCRC32, false, \ "Use CRC32 instructions for CRC32 computation") \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ product(bool, TraceTraps, false, "Trace all traps the signal handler") #endif diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Mar 08 14:39:50 2016 +0000 @@ -2070,25 +2070,32 @@ // oldv holds comparison value // newv holds value to write in exchange // addr identifies memory word to compare against/update - // tmp returns 0/1 for success/failure - Label retry_load, nope; - - bind(retry_load); - // flush and load exclusive from the memory location - // and fail if it is not what we expect - ldaxr(tmp, addr); - cmp(tmp, oldv); - br(Assembler::NE, nope); - // if we store+flush with no intervening write tmp wil be zero - stlxr(tmp, newv, addr); - cbzw(tmp, succeed); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - b(retry_load); - // if the memory word differs we return it in oldv and signal a fail - bind(nope); - membar(AnyAny); - mov(oldv, tmp); + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::xword, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxr(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxr(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } if (fail) b(*fail); } @@ -2099,28 +2106,63 @@ // newv holds value to write in exchange // addr identifies memory word to compare against/update // tmp returns 0/1 for success/failure - Label retry_load, nope; - - bind(retry_load); - // flush and load exclusive from the memory location - // and fail if it is not what we expect - ldaxrw(tmp, addr); - cmp(tmp, oldv); - br(Assembler::NE, nope); - // if we store+flush with no intervening write tmp wil be zero - stlxrw(tmp, newv, addr); - cbzw(tmp, succeed); - // retry so we only ever return after a load fails to compare - // ensures we don't return a stale value after a failed write. - b(retry_load); - // if the memory word differs we return it in oldv and signal a fail - bind(nope); - membar(AnyAny); - mov(oldv, tmp); + if (UseLSE) { + mov(tmp, oldv); + casal(Assembler::word, oldv, newv, addr); + cmp(tmp, oldv); + br(Assembler::EQ, succeed); + membar(AnyAny); + } else { + Label retry_load, nope; + + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldaxrw(tmp, addr); + cmp(tmp, oldv); + br(Assembler::NE, nope); + // if we store+flush with no intervening write tmp wil be zero + stlxrw(tmp, newv, addr); + cbzw(tmp, succeed); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + } if (fail) b(*fail); } +// A generic CAS; success or failure is in the EQ flag. +void MacroAssembler::cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, + bool acquire, bool release, + Register tmp) { + if (UseLSE) { + mov(tmp, expected); + lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true); + cmp(tmp, expected); + } else { + BLOCK_COMMENT("cmpxchg {"); + Label retry_load, done; + bind(retry_load); + load_exclusive(tmp, addr, size, acquire); + if (size == xword) + cmp(tmp, expected); + else + cmpw(tmp, expected); + br(Assembler::NE, done); + store_exclusive(tmp, new_val, addr, size, release); + cbnzw(tmp, retry_load); + bind(done); + BLOCK_COMMENT("} cmpxchg"); + } +} + static bool different(Register a, RegisterOrConstant b, Register c) { if (b.is_constant()) return a != c; diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Mar 08 14:39:50 2016 +0000 @@ -971,21 +971,10 @@ } // A generic CAS; success or failure is in the EQ flag. - template void cmpxchg(Register addr, Register expected, Register new_val, - T1 load_insn, - void (MacroAssembler::*cmp_insn)(Register, Register), - T2 store_insn, - Register tmp = rscratch1) { - Label retry_load, done; - bind(retry_load); - (this->*load_insn)(tmp, addr); - (this->*cmp_insn)(tmp, expected); - br(Assembler::NE, done); - (this->*store_insn)(tmp, new_val, addr); - cbnzw(tmp, retry_load); - bind(done); - } + enum operand_size size, + bool acquire, bool release, + Register tmp = rscratch1); // Calls diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/register_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/register_aarch64.hpp Tue Mar 08 14:39:50 2016 +0000 @@ -107,6 +107,9 @@ CONSTANT_REGISTER_DECLARATION(Register, zr, (32)); CONSTANT_REGISTER_DECLARATION(Register, sp, (33)); +// Used as a filler in instructions where a register field is unused. +const Register dummy_reg = r31_sp; + // Use FloatRegister as shortcut class FloatRegisterImpl; typedef FloatRegisterImpl* FloatRegister; diff -r b18243f4d955 -r 4d1e93624d6a hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Mar 07 15:03:48 2016 -0800 +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Tue Mar 08 14:39:50 2016 +0000 @@ -61,6 +61,10 @@ #define HWCAP_CRC32 (1<<7) #endif +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1<<8) +#endif + int VM_Version::_cpu; int VM_Version::_model; int VM_Version::_model2; @@ -172,6 +176,7 @@ if (auxv & HWCAP_AES) strcat(buf, ", aes"); if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); + if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); _features_string = os::strdup(buf); @@ -191,6 +196,15 @@ FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); } + if (auxv & HWCAP_ATOMICS) { + if (FLAG_IS_DEFAULT(UseLSE)) + FLAG_SET_DEFAULT(UseLSE, true); + } else { + if (UseLSE) { + warning("UseLSE specified, but not supported on this CPU"); + } + } + if (auxv & HWCAP_AES) { UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); UseAESIntrinsics =