8133352: aarch64: generates constrained unpredictable instructions
Summary: Fix generation of unpredictable STXR Rs, Rt, [Rn] with Rs == Rt
Reviewed-by: kvn, aph, adinn
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000
@@ -268,7 +268,7 @@
__ ldar(r21, r28); // ldar x21, [x28]
// LoadStoreExclusiveOp
- __ stxrw(r24, r24, r7); // stxr w24, w24, [x7]
+ __ stxrw(r21, r24, r7); // stxr w21, w24, [x7]
__ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28]
__ ldxrw(r21, r6); // ldxr w21, [x6]
__ ldaxrw(r15, r30); // ldaxr w15, [x30]
@@ -299,7 +299,7 @@
// LoadStoreExclusiveOp
__ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22]
- __ ldaxpw(r14, r14, r15); // ldaxp w14, w14, [x15]
+ __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15]
__ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10]
__ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18]
@@ -773,7 +773,7 @@
260: c85fffbb ldaxr x27, [x29]
264: c89fffa0 stlr x0, [x29]
268: c8dfff95 ldar x21, [x28]
- 26c: 88187cf8 stxr w24, w24, [x7]
+ 26c: 88157cf8 stxr w21, w24, [x7]
270: 8815ff9a stlxr w21, w26, [x28]
274: 885f7cd5 ldxr w21, [x6]
278: 885fffcf ldaxr w15, [x30]
@@ -796,7 +796,7 @@
2bc: c82870bb stxp w8, x27, x28, [x5]
2c0: c825b8c8 stlxp w5, x8, x14, [x6]
2c4: 887f12d9 ldxp w25, w4, [x22]
- 2c8: 887fb9ee ldaxp w14, w14, [x15]
+ 2c8: 887fb9ed ldaxp w13, w14, [x15]
2cc: 8834215a stxp w20, w26, w8, [x10]
2d0: 8837ca52 stlxp w23, w18, w18, [x18]
2d4: f806317e str x30, [x11,#99]
@@ -1085,13 +1085,13 @@
0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040,
0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05,
- 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88187cf8,
+ 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8,
0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73,
0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd,
0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e,
0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70,
0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb,
- 0xc825b8c8, 0x887f12d9, 0x887fb9ee, 0x8834215a,
+ 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a,
0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2,
0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356,
0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3,
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp Tue Aug 18 12:40:22 2015 +0000
@@ -1106,13 +1106,13 @@
#define INSN4(NAME, sz, op, o0) /* Four registers */ \
void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \
- assert(Rs != Rn, "unpredictable instruction"); \
+ guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \
}
#define INSN3(NAME, sz, op, o0) /* Three registers */ \
void NAME(Register Rs, Register Rt, Register Rn) { \
- assert(Rs != Rn, "unpredictable instruction"); \
+ guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0); \
}
@@ -1124,6 +1124,7 @@
#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
void NAME(Register Rt1, Register Rt2, Register Rn) { \
+ guarantee(Rt1 != Rt2, "unpredictable instruction"); \
load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0); \
}
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000
@@ -611,6 +611,7 @@
Label done;
const Register swap_reg = r0;
+ const Register tmp = c_rarg2;
const Register obj_reg = c_rarg3; // Will contain the oop
const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
@@ -624,7 +625,7 @@
ldr(obj_reg, Address(lock_reg, obj_offset));
if (UseBiasedLocking) {
- biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case);
+ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
}
// Load (object->mark() | 1) into swap_reg
@@ -643,7 +644,7 @@
cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
bind(fast);
atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
- rscratch2, rscratch1);
+ rscratch2, rscratch1, tmp);
b(done);
bind(fail);
} else {
@@ -671,7 +672,7 @@
if (PrintBiasedLockingStatistics) {
br(Assembler::NE, slow_case);
atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
- rscratch2, rscratch1);
+ rscratch2, rscratch1, tmp);
}
br(Assembler::EQ, done);
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000
@@ -399,11 +399,7 @@
if (PrintBiasedLockingStatistics && counters == NULL)
counters = BiasedLocking::counters();
- bool need_tmp_reg = false;
- if (tmp_reg == noreg) {
- tmp_reg = rscratch2;
- }
- assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1);
+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, rscratch1, rscratch2, noreg);
assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes());
@@ -433,7 +429,7 @@
if (counters != NULL) {
Label around;
cbnz(tmp_reg, around);
- atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1);
+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, rscratch1, rscratch2);
b(done);
bind(around);
} else {
@@ -486,7 +482,7 @@
bind(here);
if (counters != NULL) {
atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
- tmp_reg, rscratch1);
+ tmp_reg, rscratch1, rscratch2);
}
}
b(done);
@@ -512,7 +508,7 @@
bind(here);
if (counters != NULL) {
atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
- tmp_reg, rscratch1);
+ tmp_reg, rscratch1, rscratch2);
}
}
b(done);
@@ -540,7 +536,7 @@
// removing the bias bit from the object's header.
if (counters != NULL) {
atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
- rscratch1);
+ rscratch1, rscratch2);
}
bind(nope);
}
@@ -1641,15 +1637,15 @@
return Address(Rd);
}
-void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
Label retry_load;
bind(retry_load);
// flush and load exclusive from the memory location
ldxrw(tmp, counter_addr);
addw(tmp, tmp, 1);
// if we store+flush with no intervening write tmp wil be zero
- stxrw(tmp, tmp, counter_addr);
- cbnzw(tmp, retry_load);
+ stxrw(tmp2, tmp, counter_addr);
+ cbnzw(tmp2, retry_load);
}
@@ -2022,6 +2018,14 @@
}
}
+void MacroAssembler::subw(Register Rd, Register Rn, RegisterOrConstant decrement) {
+ if (decrement.is_register()) {
+ subw(Rd, Rn, decrement.as_register());
+ } else {
+ subw(Rd, Rn, decrement.as_constant());
+ }
+}
+
void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
@@ -2111,7 +2115,7 @@
return a != b.as_register() && a != c && b.as_register() != c;
}
-#define ATOMIC_OP(LDXR, OP, STXR) \
+#define ATOMIC_OP(LDXR, OP, IOP, STXR) \
void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \
Register result = rscratch2; \
if (prev->is_valid()) \
@@ -2121,14 +2125,15 @@
bind(retry_load); \
LDXR(result, addr); \
OP(rscratch1, result, incr); \
- STXR(rscratch1, rscratch1, addr); \
- cbnzw(rscratch1, retry_load); \
- if (prev->is_valid() && prev != result) \
- mov(prev, result); \
+ STXR(rscratch2, rscratch1, addr); \
+ cbnzw(rscratch2, retry_load); \
+ if (prev->is_valid() && prev != result) { \
+ IOP(prev, rscratch1, incr); \
+ } \
}
-ATOMIC_OP(ldxr, add, stxr)
-ATOMIC_OP(ldxrw, addw, stxrw)
+ATOMIC_OP(ldxr, add, sub, stxr)
+ATOMIC_OP(ldxrw, addw, subw, stxrw)
#undef ATOMIC_OP
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Aug 18 12:40:22 2015 +0000
@@ -107,9 +107,7 @@
// Biased locking support
// lock_reg and obj_reg must be loaded up with the appropriate values.
// swap_reg is killed.
- // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
- // be killed; if not supplied, push/pop will be used internally to
- // allocate a temporary (inefficient, avoid if possible).
+ // tmp_reg must be supplied and must not be rscratch1 or rscratch2
// Optional slow case is for implementations (interpreter and C1) which branch to
// slow case directly. Leaves condition codes set for C2's Fast_Lock node.
// Returns offset of first potentially-faulting instruction for null
@@ -126,10 +124,10 @@
// Helper functions for statistics gathering.
// Unconditional atomic increment.
- void atomic_incw(Register counter_addr, Register tmp);
- void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
+ void atomic_incw(Register counter_addr, Register tmp, Register tmp2);
+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) {
lea(tmp1, counter_addr);
- atomic_incw(tmp1, tmp2);
+ atomic_incw(tmp1, tmp2, tmp3);
}
// Load Effective Address
void lea(Register r, const Address &a) {
@@ -1057,6 +1055,7 @@
void add(Register Rd, Register Rn, RegisterOrConstant increment);
void addw(Register Rd, Register Rn, RegisterOrConstant increment);
void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
+ void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000
@@ -1774,6 +1774,7 @@
const Register obj_reg = r19; // Will contain the oop
const Register lock_reg = r13; // Address of compiler lock object (BasicLock)
const Register old_hdr = r13; // value of old header at unlock time
+ const Register tmp = c_rarg3;
Label slow_path_lock;
Label lock_done;
@@ -1795,7 +1796,7 @@
__ ldr(obj_reg, Address(oop_handle_reg, 0));
if (UseBiasedLocking) {
- __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, lock_done, &slow_path_lock);
+ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
}
// Load (object->mark() | 1) into swap_reg %r0
--- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Aug 19 11:59:02 2015 +0000
+++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Tue Aug 18 12:40:22 2015 +0000
@@ -1913,15 +1913,18 @@
}
void TemplateInterpreterGenerator::count_bytecode() {
+ Register rscratch3 = r0;
__ push(rscratch1);
__ push(rscratch2);
+ __ push(rscratch3);
Label L;
__ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
__ bind(L);
__ ldxr(rscratch1, rscratch2);
__ add(rscratch1, rscratch1, 1);
- __ stxr(rscratch1, rscratch1, rscratch2);
- __ cbnzw(rscratch1, L);
+ __ stxr(rscratch3, rscratch1, rscratch2);
+ __ cbnzw(rscratch3, L);
+ __ pop(rscratch3);
__ pop(rscratch2);
__ pop(rscratch1);
}