# HG changeset patch # User enevill # Date 1439901622 0 # Node ID 13b0caf18153c3c3996bc9d1bbc9b5231d0dd528 # Parent bbbc2f6d236776a7d0dc5ed667ae6caec087be5a 8133352: aarch64: generates constrained unpredictable instructions Summary: Fix generation of unpredictable STXR Rs, Rt, [Rn] with Rs == Rt Reviewed-by: kvn, aph, adinn diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000 @@ -268,7 +268,7 @@ __ ldar(r21, r28); // ldar x21, [x28] // LoadStoreExclusiveOp - __ stxrw(r24, r24, r7); // stxr w24, w24, [x7] + __ stxrw(r21, r24, r7); // stxr w21, w24, [x7] __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28] __ ldxrw(r21, r6); // ldxr w21, [x6] __ ldaxrw(r15, r30); // ldaxr w15, [x30] @@ -299,7 +299,7 @@ // LoadStoreExclusiveOp __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] - __ ldaxpw(r14, r14, r15); // ldaxp w14, w14, [x15] + __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15] __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10] __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18] @@ -773,7 +773,7 @@ 260: c85fffbb ldaxr x27, [x29] 264: c89fffa0 stlr x0, [x29] 268: c8dfff95 ldar x21, [x28] - 26c: 88187cf8 stxr w24, w24, [x7] + 26c: 88157cf8 stxr w21, w24, [x7] 270: 8815ff9a stlxr w21, w26, [x28] 274: 885f7cd5 ldxr w21, [x6] 278: 885fffcf ldaxr w15, [x30] @@ -796,7 +796,7 @@ 2bc: c82870bb stxp w8, x27, x28, [x5] 2c0: c825b8c8 stlxp w5, x8, x14, [x6] 2c4: 887f12d9 ldxp w25, w4, [x22] - 2c8: 887fb9ee ldaxp w14, w14, [x15] + 2c8: 887fb9ed ldaxp w13, w14, [x15] 2cc: 8834215a stxp w20, w26, w8, [x10] 2d0: 8837ca52 stlxp w23, w18, w18, [x18] 2d4: f806317e str x30, [x11,#99] @@ -1085,13 +1085,13 @@ 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, - 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88187cf8, + 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8, 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, - 0xc825b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a, + 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a, 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3, diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Aug 18 12:40:22 2015 +0000 @@ -1106,13 +1106,13 @@ #define INSN4(NAME, sz, op, o0) /* Four registers */ \ void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ - assert(Rs != Rn, "unpredictable instruction"); \ + guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ } #define INSN3(NAME, sz, op, o0) /* Three registers */ \ void NAME(Register Rs, Register Rt, Register Rn) { \ - assert(Rs != Rn, "unpredictable instruction"); \ + guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \ } @@ -1124,6 +1124,7 @@ #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ void NAME(Register Rt1, Register Rt2, Register Rn) { \ + guarantee(Rt1 != Rt2, "unpredictable instruction"); \ load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \ } diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000 @@ -611,6 +611,7 @@ Label done; const Register swap_reg = r0; + const Register tmp = c_rarg2; const Register obj_reg = c_rarg3; // Will contain the oop const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); @@ -624,7 +625,7 @@ ldr(obj_reg, Address(lock_reg, obj_offset)); if (UseBiasedLocking) { - biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case); + biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); } // Load (object->mark() | 1) into swap_reg @@ -643,7 +644,7 @@ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); bind(fast); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); b(done); bind(fail); } else { @@ -671,7 +672,7 @@ if (PrintBiasedLockingStatistics) { br(Assembler::NE, slow_case); atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), - rscratch2, rscratch1); + rscratch2, rscratch1, tmp); } br(Assembler::EQ, done); diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000 @@ -399,11 +399,7 @@ if (PrintBiasedLockingStatistics && counters == NULL) counters = BiasedLocking::counters(); - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - tmp_reg = rscratch2; - } - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1); + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg); assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); @@ -433,7 +429,7 @@ if (counters != NULL) { Label around; cbnz(tmp_reg, around); - atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1); + atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2); b(done); bind(around); } else { @@ -486,7 +482,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -512,7 +508,7 @@ bind(here); if (counters != NULL) { atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), - tmp_reg, rscratch1); + tmp_reg, rscratch1, rscratch2); } } b(done); @@ -540,7 +536,7 @@ // removing the bias bit from the object's header. if (counters != NULL) { atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, - rscratch1); + rscratch1, rscratch2); } bind(nope); } @@ -1641,15 +1637,15 @@ return Address(Rd); } -void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { +void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { Label retry_load; bind(retry_load); // flush and load exclusive from the memory location ldxrw(tmp, counter_addr); addw(tmp, tmp, 1); // if we store+flush with no intervening write tmp wil be zero - stxrw(tmp, tmp, counter_addr); - cbnzw(tmp, retry_load); + stxrw(tmp2, tmp, counter_addr); + cbnzw(tmp2, retry_load); } @@ -2022,6 +2018,14 @@ } } +void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) { + if (decrement.is_register()) { + subw(Rd, Rn, decrement.as_register()); + } else { + subw(Rd, Rn, decrement.as_constant()); + } +} + void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { @@ -2111,7 +2115,7 @@ return a != b.as_register() && a != c && b.as_register() != c; } -#define ATOMIC_OP(LDXR, OP, STXR) \ +#define ATOMIC_OP(LDXR, OP, IOP, STXR) \ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \ Register result = rscratch2; \ if (prev->is_valid()) \ @@ -2121,14 +2125,15 @@ bind(retry_load); \ LDXR(result, addr); \ OP(rscratch1, result, incr); \ - STXR(rscratch1, rscratch1, addr); \ - cbnzw(rscratch1, retry_load); \ - if (prev->is_valid() && prev != result) \ - mov(prev, result); \ + STXR(rscratch2, rscratch1, addr); \ + cbnzw(rscratch2, retry_load); \ + if (prev->is_valid() && prev != result) { \ + IOP(prev, rscratch1, incr); \ + } \ } -ATOMIC_OP(ldxr, add, stxr) -ATOMIC_OP(ldxrw, addw, stxrw) +ATOMIC_OP(ldxr, add, sub, stxr) +ATOMIC_OP(ldxrw, addw, subw, stxrw) #undef ATOMIC_OP diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Aug 18 12:40:22 2015 +0000 @@ -107,9 +107,7 @@ // Biased locking support // lock_reg and obj_reg must be loaded up with the appropriate values. // swap_reg is killed. - // tmp_reg is optional. If it is supplied (i.e., != noreg) it will - // be killed; if not supplied, push/pop will be used internally to - // allocate a temporary (inefficient, avoid if possible). + // tmp_reg must be supplied and must not be rscratch1 or rscratch2 // Optional slow case is for implementations (interpreter and C1) which branch to // slow case directly. Leaves condition codes set for C2's Fast_Lock node. // Returns offset of first potentially-faulting instruction for null @@ -126,10 +124,10 @@ // Helper functions for statistics gathering. // Unconditional atomic increment. - void atomic_incw(Register counter_addr, Register tmp); - void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { + void atomic_incw(Register counter_addr, Register tmp, Register tmp2); + void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) { lea(tmp1, counter_addr); - atomic_incw(tmp1, tmp2); + atomic_incw(tmp1, tmp2, tmp3); } // Load Effective Address void lea(Register r, const Address &a) { @@ -1057,6 +1055,7 @@ void add(Register Rd, Register Rn, RegisterOrConstant increment); void addw(Register Rd, Register Rn, RegisterOrConstant increment); void sub(Register Rd, Register Rn, RegisterOrConstant decrement); + void subw(Register Rd, Register Rn, RegisterOrConstant decrement); void adrp(Register reg1, const Address &dest, unsigned long &byte_offset); diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000 @@ -1774,6 +1774,7 @@ const Register obj_reg = r19; // Will contain the oop const Register lock_reg = r13; // Address of compiler lock object (BasicLock) const Register old_hdr = r13; // value of old header at unlock time + const Register tmp = c_rarg3; Label slow_path_lock; Label lock_done; @@ -1795,7 +1796,7 @@ __ ldr(obj_reg, Address(oop_handle_reg, 0)); if (UseBiasedLocking) { - __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock); + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); } // Load (object->mark() | 1) into swap_reg %r0 diff -r bbbc2f6d2367 -r 13b0caf18153 hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000 +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000 @@ -1913,15 +1913,18 @@ } void TemplateInterpreterGenerator::count_bytecode() { + Register rscratch3 = r0; __ push(rscratch1); __ push(rscratch2); + __ push(rscratch3); Label L; __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); __ bind(L); __ ldxr(rscratch1, rscratch2); __ add(rscratch1, rscratch1, 1); - __ stxr(rscratch1, rscratch1, rscratch2); - __ cbnzw(rscratch1, L); + __ stxr(rscratch3, rscratch1, rscratch2); + __ cbnzw(rscratch3, L); + __ pop(rscratch3); __ pop(rscratch2); __ pop(rscratch1); }