--- a/src/hotspot/cpu/aarch64/aarch64.ad Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64.ad Thu Sep 06 18:06:24 2018 -0700
@@ -3378,26 +3378,18 @@
// Load markOop from object into displaced_header.
__ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
- // Always do locking in runtime.
- if (EmitSync & 0x01) {
- __ cmp(oop, zr);
- return;
- }
-
if (UseBiasedLocking && !UseOptoBiasInlining) {
__ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
}
// Handle existing monitor
- if ((EmitSync & 0x02) == 0) {
- // we can use AArch64's bit test and branch here but
- // markoopDesc does not define a bit index just the bit value
- // so assert in case the bit pos changes
-# define __monitor_value_log2 1
- assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
- __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
-# undef __monitor_value_log2
- }
+ // we can use AArch64's bit test and branch here but
+ // markoopDesc does not define a bit index just the bit value
+ // so assert in case the bit pos changes
+# define __monitor_value_log2 1
+ assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
+ __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
+# undef __monitor_value_log2
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
__ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
@@ -3455,63 +3447,62 @@
__ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- __ b(cont);
-
- __ bind(object_has_monitor);
- // The object's monitor m is unlocked iff m->owner == NULL,
- // otherwise m->owner may contain a thread or a stack address.
- //
- // Try to CAS m->owner from NULL to current thread.
- __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
- __ mov(disp_hdr, zr);
-
- if (UseLSE) {
- __ mov(rscratch1, disp_hdr);
- __ casal(Assembler::xword, rscratch1, rthread, tmp);
- __ cmp(rscratch1, disp_hdr);
- } else {
- Label retry_load, fail;
- if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
- __ prfm(Address(tmp), PSTL1STRM);
- __ bind(retry_load);
- __ ldaxr(rscratch1, tmp);
- __ cmp(disp_hdr, rscratch1);
- __ br(Assembler::NE, fail);
- // use stlxr to ensure update is immediately visible
- __ stlxr(rscratch1, rthread, tmp);
- __ cbnzw(rscratch1, retry_load);
- __ bind(fail);
+ __ b(cont);
+
+ __ bind(object_has_monitor);
+ // The object's monitor m is unlocked iff m->owner == NULL,
+ // otherwise m->owner may contain a thread or a stack address.
+ //
+ // Try to CAS m->owner from NULL to current thread.
+ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
+ __ mov(disp_hdr, zr);
+
+ if (UseLSE) {
+ __ mov(rscratch1, disp_hdr);
+ __ casal(Assembler::xword, rscratch1, rthread, tmp);
+ __ cmp(rscratch1, disp_hdr);
+ } else {
+ Label retry_load, fail;
+ if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
+ __ prfm(Address(tmp), PSTL1STRM);
}
-
- // Label next;
- // __ cmpxchgptr(/*oldv=*/disp_hdr,
- // /*newv=*/rthread,
- // /*addr=*/tmp,
- // /*tmp=*/rscratch1,
- // /*succeed*/next,
- // /*fail*/NULL);
- // __ bind(next);
-
- // store a non-null value into the box.
- __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
- // PPC port checks the following invariants
- // #ifdef ASSERT
- // bne(flag, cont);
- // We have acquired the monitor, check some invariants.
- // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
- // Invariant 1: _recursions should be 0.
- // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
- // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
- // "monitor->_recursions should be 0", -1);
- // Invariant 2: OwnerIsThread shouldn't be 0.
- // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
- //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
- // "monitor->OwnerIsThread shouldn't be 0", -1);
- // #endif
+ __ bind(retry_load);
+ __ ldaxr(rscratch1, tmp);
+ __ cmp(disp_hdr, rscratch1);
+ __ br(Assembler::NE, fail);
+ // use stlxr to ensure update is immediately visible
+ __ stlxr(rscratch1, rthread, tmp);
+ __ cbnzw(rscratch1, retry_load);
+ __ bind(fail);
}
+ // Label next;
+ // __ cmpxchgptr(/*oldv=*/disp_hdr,
+ // /*newv=*/rthread,
+ // /*addr=*/tmp,
+ // /*tmp=*/rscratch1,
+ // /*succeed*/next,
+ // /*fail*/NULL);
+ // __ bind(next);
+
+ // store a non-null value into the box.
+ __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+ // PPC port checks the following invariants
+ // #ifdef ASSERT
+ // bne(flag, cont);
+ // We have acquired the monitor, check some invariants.
+ // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
+ // Invariant 1: _recursions should be 0.
+ // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+ // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
+ // "monitor->_recursions should be 0", -1);
+ // Invariant 2: OwnerIsThread shouldn't be 0.
+ // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
+ //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
+ // "monitor->OwnerIsThread shouldn't be 0", -1);
+ // #endif
+
__ bind(cont);
// flag == EQ indicates success
// flag == NE indicates failure
@@ -3533,12 +3524,6 @@
assert_different_registers(oop, box, tmp, disp_hdr);
- // Always do locking in runtime.
- if (EmitSync & 0x01) {
- __ cmp(oop, zr); // Oop can't be 0 here => always false.
- return;
- }
-
if (UseBiasedLocking && !UseOptoBiasInlining) {
__ biased_locking_exit(oop, tmp, cont);
}
@@ -3552,10 +3537,8 @@
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
- __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
- }
+ __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+ __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
// Check if it is still a light weight lock, this is is true if we
// see the stack address of the basicLock in the markOop of the
@@ -3590,27 +3573,25 @@
__ bind(cas_failed);
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- __ b(cont);
-
- __ bind(object_has_monitor);
- __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
- __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
- __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
- __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
- __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
- __ cmp(rscratch1, zr);
- __ br(Assembler::NE, cont);
-
- __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
- __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
- __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
- __ cmp(rscratch1, zr);
- __ cbnz(rscratch1, cont);
- // need a release store here
- __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
- __ stlr(rscratch1, tmp); // rscratch1 is zero
- }
+ __ b(cont);
+
+ __ bind(object_has_monitor);
+ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
+ __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+ __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+ __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
+ __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
+ __ cmp(rscratch1, zr);
+ __ br(Assembler::NE, cont);
+
+ __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+ __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+ __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
+ __ cmp(rscratch1, zr);
+ __ cbnz(rscratch1, cont);
+ // need a release store here
+ __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+ __ stlr(rscratch1, tmp); // rscratch1 is zero
__ bind(cont);
// flag == EQ indicates success
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp Thu Sep 06 18:06:24 2018 -0700
@@ -2848,12 +2848,6 @@
ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
- // Always do locking in runtime.
- if (EmitSync & 0x01) {
- cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
- return;
- }
-
if (try_bias) {
biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
}
@@ -2867,11 +2861,9 @@
#endif // INCLUDE_RTM_OPT
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- // The object has an existing monitor iff (mark & monitor_value) != 0.
- andi_(temp, displaced_header, markOopDesc::monitor_value);
- bne(CCR0, object_has_monitor);
- }
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ andi_(temp, displaced_header, markOopDesc::monitor_value);
+ bne(CCR0, object_has_monitor);
// Set displaced_header to be (markOop of object | UNLOCK_VALUE).
ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
@@ -2914,48 +2906,46 @@
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- b(cont);
-
- bind(object_has_monitor);
- // The object's monitor m is unlocked iff m->owner == NULL,
- // otherwise m->owner may contain a thread or a stack address.
+ b(cont);
+
+ bind(object_has_monitor);
+ // The object's monitor m is unlocked iff m->owner == NULL,
+ // otherwise m->owner may contain a thread or a stack address.
#if INCLUDE_RTM_OPT
- // Use the same RTM locking code in 32- and 64-bit VM.
- if (use_rtm) {
- rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
- rtm_counters, method_data, profile_rtm, cont);
- } else {
+ // Use the same RTM locking code in 32- and 64-bit VM.
+ if (use_rtm) {
+ rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+ rtm_counters, method_data, profile_rtm, cont);
+ } else {
#endif // INCLUDE_RTM_OPT
- // Try to CAS m->owner from NULL to current thread.
- addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
- cmpxchgd(/*flag=*/flag,
- /*current_value=*/current_header,
- /*compare_value=*/(intptr_t)0,
- /*exchange_value=*/R16_thread,
- /*where=*/temp,
- MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
- MacroAssembler::cmpxchgx_hint_acquire_lock());
-
- // Store a non-null value into the box.
- std(box, BasicLock::displaced_header_offset_in_bytes(), box);
-
-# ifdef ASSERT
- bne(flag, cont);
- // We have acquired the monitor, check some invariants.
- addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
- // Invariant 1: _recursions should be 0.
- //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
- asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
+ // Try to CAS m->owner from NULL to current thread.
+ addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
+ cmpxchgd(/*flag=*/flag,
+ /*current_value=*/current_header,
+ /*compare_value=*/(intptr_t)0,
+ /*exchange_value=*/R16_thread,
+ /*where=*/temp,
+ MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+ MacroAssembler::cmpxchgx_hint_acquire_lock());
+
+ // Store a non-null value into the box.
+ std(box, BasicLock::displaced_header_offset_in_bytes(), box);
+
+# ifdef ASSERT
+ bne(flag, cont);
+ // We have acquired the monitor, check some invariants.
+ addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
+ // Invariant 1: _recursions should be 0.
+ //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+ asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
"monitor->_recursions should be 0", -1);
-# endif
+# endif
#if INCLUDE_RTM_OPT
- } // use_rtm()
+ } // use_rtm()
#endif
- }
bind(cont);
// flag == EQ indicates success
@@ -2970,12 +2960,6 @@
Label cont;
Label object_has_monitor;
- // Always do locking in runtime.
- if (EmitSync & 0x01) {
- cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
- return;
- }
-
if (try_bias) {
biased_locking_exit(flag, oop, current_header, cont);
}
@@ -3002,13 +2986,11 @@
beq(flag, cont);
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- // The object has an existing monitor iff (mark & monitor_value) != 0.
- RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
- ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
- andi_(R0, current_header, markOopDesc::monitor_value);
- bne(CCR0, object_has_monitor);
- }
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
+ ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
+ andi_(R0, current_header, markOopDesc::monitor_value);
+ bne(CCR0, object_has_monitor);
// Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markOop of the object.
@@ -3026,40 +3008,38 @@
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- b(cont);
-
- bind(object_has_monitor);
- addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
- ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
+ b(cont);
+
+ bind(object_has_monitor);
+ addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
+ ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
// It's inflated.
#if INCLUDE_RTM_OPT
- if (use_rtm) {
- Label L_regular_inflated_unlock;
- // Clean monitor_value bit to get valid pointer
- cmpdi(flag, temp, 0);
- bne(flag, L_regular_inflated_unlock);
- tend_();
- b(cont);
- bind(L_regular_inflated_unlock);
- }
+ if (use_rtm) {
+ Label L_regular_inflated_unlock;
+ // Clean monitor_value bit to get valid pointer
+ cmpdi(flag, temp, 0);
+ bne(flag, L_regular_inflated_unlock);
+ tend_();
+ b(cont);
+ bind(L_regular_inflated_unlock);
+ }
#endif
- ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
- xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
- orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
- cmpdi(flag, temp, 0);
- bne(flag, cont);
-
- ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
- ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
- orr(temp, temp, displaced_header); // Will be 0 if both are 0.
- cmpdi(flag, temp, 0);
- bne(flag, cont);
- release();
- std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
- }
+ ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
+ xorr(temp, R16_thread, temp); // Will be 0 if we are the owner.
+ orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
+ cmpdi(flag, temp, 0);
+ bne(flag, cont);
+
+ ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
+ ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
+ orr(temp, temp, displaced_header); // Will be 0 if both are 0.
+ cmpdi(flag, temp, 0);
+ bne(flag, cont);
+ release();
+ std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
bind(cont);
// flag == EQ indicates success
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp Thu Sep 06 18:06:24 2018 -0700
@@ -3374,13 +3374,11 @@
}
// Handle existing monitor.
- if ((EmitSync & 0x01) == 0) {
- // The object has an existing monitor iff (mark & monitor_value) != 0.
- guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
- z_lr(temp, displacedHeader);
- z_nill(temp, markOopDesc::monitor_value);
- z_brne(object_has_monitor);
- }
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+ z_lr(temp, displacedHeader);
+ z_nill(temp, markOopDesc::monitor_value);
+ z_brne(object_has_monitor);
// Set mark to markOop | markOopDesc::unlocked_value.
z_oill(displacedHeader, markOopDesc::unlocked_value);
@@ -3411,28 +3409,26 @@
z_bru(done);
- if ((EmitSync & 0x01) == 0) {
- Register zero = temp;
- Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
- bind(object_has_monitor);
- // The object's monitor m is unlocked iff m->owner == NULL,
- // otherwise m->owner may contain a thread or a stack address.
- //
- // Try to CAS m->owner from NULL to current thread.
- z_lghi(zero, 0);
- // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
- z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
- // Store a non-null value into the box.
- z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
+ Register zero = temp;
+ Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
+ bind(object_has_monitor);
+ // The object's monitor m is unlocked iff m->owner == NULL,
+ // otherwise m->owner may contain a thread or a stack address.
+ //
+ // Try to CAS m->owner from NULL to current thread.
+ z_lghi(zero, 0);
+ // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+ z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
+ // Store a non-null value into the box.
+ z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
#ifdef ASSERT
- z_brne(done);
- // We've acquired the monitor, check some invariants.
- // Invariant 1: _recursions should be 0.
- asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
- "monitor->_recursions should be 0", -1);
- z_ltgr(zero, zero); // Set CR=EQ.
+ z_brne(done);
+ // We've acquired the monitor, check some invariants.
+ // Invariant 1: _recursions should be 0.
+ asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
+ "monitor->_recursions should be 0", -1);
+ z_ltgr(zero, zero); // Set CR=EQ.
#endif
- }
bind(done);
BLOCK_COMMENT("} compiler_fast_lock_object");
@@ -3461,13 +3457,11 @@
z_bre(done);
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- // The object has an existing monitor iff (mark & monitor_value) != 0.
- z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
- guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
- z_nill(currentHeader, markOopDesc::monitor_value);
- z_brne(object_has_monitor);
- }
+ // The object has an existing monitor iff (mark & monitor_value) != 0.
+ z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
+ guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+ z_nill(currentHeader, markOopDesc::monitor_value);
+ z_brne(object_has_monitor);
// Check if it is still a light weight lock, this is true if we see
// the stack address of the basicLock in the markOop of the object
@@ -3477,20 +3471,18 @@
z_bru(done); // Csg sets CR as desired.
// Handle existing monitor.
- if ((EmitSync & 0x02) == 0) {
- bind(object_has_monitor);
- z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
- load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- z_brne(done);
- load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- z_brne(done);
- load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- z_brne(done);
- load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- z_brne(done);
- z_release();
- z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
- }
+ bind(object_has_monitor);
+ z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set.
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ z_brne(done);
+ load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ z_brne(done);
+ z_release();
+ z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
bind(done);
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp Thu Sep 06 18:06:24 2018 -0700
@@ -2648,195 +2648,92 @@
inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
}
- if (EmitSync & 1) {
- mov(3, Rscratch);
- st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
- cmp(SP, G0);
- return ;
- }
-
- if (EmitSync & 2) {
-
- // Fetch object's markword
- ld_ptr(mark_addr, Rmark);
-
- if (try_bias) {
- biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
- }
-
- // Save Rbox in Rscratch to be used for the cas operation
- mov(Rbox, Rscratch);
-
- // set Rmark to markOop | markOopDesc::unlocked_value
- or3(Rmark, markOopDesc::unlocked_value, Rmark);
-
- // Initialize the box. (Must happen before we update the object mark!)
- st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
- // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
- assert(mark_addr.disp() == 0, "cas must take a zero displacement");
- cas_ptr(mark_addr.base(), Rmark, Rscratch);
-
- // if compare/exchange succeeded we found an unlocked object and we now have locked it
- // hence we are done
- cmp(Rmark, Rscratch);
- sub(Rscratch, STACK_BIAS, Rscratch);
- brx(Assembler::equal, false, Assembler::pt, done);
- delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot
-
- // we did not find an unlocked object so see if this is a recursive case
- // sub(Rscratch, SP, Rscratch);
- assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
- andcc(Rscratch, 0xfffff003, Rscratch);
- st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
- bind (done);
- return ;
- }
-
Label Egress ;
- if (EmitSync & 256) {
- Label IsInflated ;
-
- ld_ptr(mark_addr, Rmark); // fetch obj->mark
- // Triage: biased, stack-locked, neutral, inflated
- if (try_bias) {
- biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
- // Invariant: if control reaches this point in the emitted stream
- // then Rmark has not been modified.
- }
-
- // Store mark into displaced mark field in the on-stack basic-lock "box"
- // Critically, this must happen before the CAS
- // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
- st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
- andcc(Rmark, 2, G0);
- brx(Assembler::notZero, false, Assembler::pn, IsInflated);
- delayed()->
-
- // Try stack-lock acquisition.
- // Beware: the 1st instruction is in a delay slot
- mov(Rbox, Rscratch);
- or3(Rmark, markOopDesc::unlocked_value, Rmark);
- assert(mark_addr.disp() == 0, "cas must take a zero displacement");
- cas_ptr(mark_addr.base(), Rmark, Rscratch);
- cmp(Rmark, Rscratch);
- brx(Assembler::equal, false, Assembler::pt, done);
- delayed()->sub(Rscratch, SP, Rscratch);
-
- // Stack-lock attempt failed - check for recursive stack-lock.
- // See the comments below about how we might remove this case.
- sub(Rscratch, STACK_BIAS, Rscratch);
- assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
- andcc(Rscratch, 0xfffff003, Rscratch);
- br(Assembler::always, false, Assembler::pt, done);
- delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
- bind(IsInflated);
- if (EmitSync & 64) {
- // If m->owner != null goto IsLocked
- // Pessimistic form: Test-and-CAS vs CAS
- // The optimistic form avoids RTS->RTO cache line upgrades.
- ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
- andcc(Rscratch, Rscratch, G0);
- brx(Assembler::notZero, false, Assembler::pn, done);
- delayed()->nop();
- // m->owner == null : it's unlocked.
- }
-
- // Try to CAS m->owner from null to Self
- // Invariant: if we acquire the lock then _recursions should be 0.
- add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
- mov(G2_thread, Rscratch);
- cas_ptr(Rmark, G0, Rscratch);
- cmp(Rscratch, G0);
- // Intentional fall-through into done
- } else {
- // Aggressively avoid the Store-before-CAS penalty
- // Defer the store into box->dhw until after the CAS
- Label IsInflated, Recursive ;
+ // Aggressively avoid the Store-before-CAS penalty
+ // Defer the store into box->dhw until after the CAS
+ Label IsInflated, Recursive ;
// Anticipate CAS -- Avoid RTS->RTO upgrade
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
- ld_ptr(mark_addr, Rmark); // fetch obj->mark
- // Triage: biased, stack-locked, neutral, inflated
-
- if (try_bias) {
- biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
- // Invariant: if control reaches this point in the emitted stream
- // then Rmark has not been modified.
- }
- andcc(Rmark, 2, G0);
- brx(Assembler::notZero, false, Assembler::pn, IsInflated);
- delayed()-> // Beware - dangling delay-slot
-
- // Try stack-lock acquisition.
- // Transiently install BUSY (0) encoding in the mark word.
- // if the CAS of 0 into the mark was successful then we execute:
- // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
- // ST obj->mark = box -- overwrite transient 0 value
- // This presumes TSO, of course.
-
- mov(0, Rscratch);
- or3(Rmark, markOopDesc::unlocked_value, Rmark);
- assert(mark_addr.disp() == 0, "cas must take a zero displacement");
- cas_ptr(mark_addr.base(), Rmark, Rscratch);
+ ld_ptr(mark_addr, Rmark); // fetch obj->mark
+ // Triage: biased, stack-locked, neutral, inflated
+
+ if (try_bias) {
+ biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
+ // Invariant: if control reaches this point in the emitted stream
+ // then Rmark has not been modified.
+ }
+ andcc(Rmark, 2, G0);
+ brx(Assembler::notZero, false, Assembler::pn, IsInflated);
+ delayed()-> // Beware - dangling delay-slot
+
+ // Try stack-lock acquisition.
+ // Transiently install BUSY (0) encoding in the mark word.
+ // if the CAS of 0 into the mark was successful then we execute:
+ // ST box->dhw = mark -- save fetched mark in on-stack basiclock box
+ // ST obj->mark = box -- overwrite transient 0 value
+ // This presumes TSO, of course.
+
+ mov(0, Rscratch);
+ or3(Rmark, markOopDesc::unlocked_value, Rmark);
+ assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+ cas_ptr(mark_addr.base(), Rmark, Rscratch);
// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
- cmp(Rscratch, Rmark);
- brx(Assembler::notZero, false, Assembler::pn, Recursive);
- delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
- if (counters != NULL) {
- cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
- }
- ba(done);
- delayed()->st_ptr(Rbox, mark_addr);
-
- bind(Recursive);
- // Stack-lock attempt failed - check for recursive stack-lock.
- // Tests show that we can remove the recursive case with no impact
- // on refworkload 0.83. If we need to reduce the size of the code
- // emitted by compiler_lock_object() the recursive case is perfect
- // candidate.
- //
- // A more extreme idea is to always inflate on stack-lock recursion.
- // This lets us eliminate the recursive checks in compiler_lock_object
- // and compiler_unlock_object and the (box->dhw == 0) encoding.
- // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
- // and showed a performance *increase*. In the same experiment I eliminated
- // the fast-path stack-lock code from the interpreter and always passed
- // control to the "slow" operators in synchronizer.cpp.
-
- // RScratch contains the fetched obj->mark value from the failed CAS.
- sub(Rscratch, STACK_BIAS, Rscratch);
- sub(Rscratch, SP, Rscratch);
- assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
- andcc(Rscratch, 0xfffff003, Rscratch);
- if (counters != NULL) {
- // Accounting needs the Rscratch register
- st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
- cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
- ba_short(done);
- } else {
- ba(done);
- delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
- }
-
- bind (IsInflated);
-
- // Try to CAS m->owner from null to Self
- // Invariant: if we acquire the lock then _recursions should be 0.
- add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
- mov(G2_thread, Rscratch);
- cas_ptr(Rmark, G0, Rscratch);
- andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
- // set icc.zf : 1=success 0=failure
- // ST box->displaced_header = NonZero.
- // Any non-zero value suffices:
- // markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
- st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
- // Intentional fall-through into done
+ cmp(Rscratch, Rmark);
+ brx(Assembler::notZero, false, Assembler::pn, Recursive);
+ delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+ if (counters != NULL) {
+ cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
}
+ ba(done);
+ delayed()->st_ptr(Rbox, mark_addr);
+
+ bind(Recursive);
+ // Stack-lock attempt failed - check for recursive stack-lock.
+ // Tests show that we can remove the recursive case with no impact
+ // on refworkload 0.83. If we need to reduce the size of the code
+ // emitted by compiler_lock_object() the recursive case is perfect
+ // candidate.
+ //
+ // A more extreme idea is to always inflate on stack-lock recursion.
+ // This lets us eliminate the recursive checks in compiler_lock_object
+ // and compiler_unlock_object and the (box->dhw == 0) encoding.
+ // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
+ // and showed a performance *increase*. In the same experiment I eliminated
+ // the fast-path stack-lock code from the interpreter and always passed
+ // control to the "slow" operators in synchronizer.cpp.
+
+ // RScratch contains the fetched obj->mark value from the failed CAS.
+ sub(Rscratch, STACK_BIAS, Rscratch);
+ sub(Rscratch, SP, Rscratch);
+ assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+ andcc(Rscratch, 0xfffff003, Rscratch);
+ if (counters != NULL) {
+ // Accounting needs the Rscratch register
+ st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+ cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
+ ba_short(done);
+ } else {
+ ba(done);
+ delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+ }
+
+ bind (IsInflated);
+
+ // Try to CAS m->owner from null to Self
+ // Invariant: if we acquire the lock then _recursions should be 0.
+ add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+ mov(G2_thread, Rscratch);
+ cas_ptr(Rmark, G0, Rscratch);
+ andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success
+ // set icc.zf : 1=success 0=failure
+ // ST box->displaced_header = NonZero.
+ // Any non-zero value suffices:
+ // markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
+ st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
+ // Intentional fall-through into done
bind (done);
}
@@ -2848,30 +2745,6 @@
Label done ;
- if (EmitSync & 4) {
- cmp(SP, G0);
- return ;
- }
-
- if (EmitSync & 8) {
- if (try_bias) {
- biased_locking_exit(mark_addr, Rscratch, done);
- }
-
- // Test first if it is a fast recursive unlock
- ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
- br_null_short(Rmark, Assembler::pt, done);
-
- // Check if it is still a light weight lock, this is is true if we see
- // the stack address of the basicLock in the markOop of the object
- assert(mark_addr.disp() == 0, "cas must take a zero displacement");
- cas_ptr(mark_addr.base(), Rbox, Rmark);
- ba(done);
- delayed()->cmp(Rbox, Rmark);
- bind(done);
- return ;
- }
-
// Beware ... If the aggregate size of the code emitted by CLO and CUO is
// is too large performance rolls abruptly off a cliff.
// This could be related to inlining policies, code cache management, or
@@ -2902,105 +2775,39 @@
// close the resultant (and rare) race by having contended threads in
// monitorenter periodically poll _owner.
- if (EmitSync & 1024) {
- // Emit code to check that _owner == Self
- // We could fold the _owner test into subsequent code more efficiently
- // than using a stand-alone check, but since _owner checking is off by
- // default we don't bother. We also might consider predicating the
- // _owner==Self check on Xcheck:jni or running on a debug build.
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
- orcc(Rscratch, G0, G0);
- brx(Assembler::notZero, false, Assembler::pn, done);
- delayed()->nop();
- }
-
- if (EmitSync & 512) {
- // classic lock release code absent 1-0 locking
- // m->Owner = null;
- // membar #storeload
- // if (m->cxq|m->EntryList) == null goto Success
- // if (m->succ != null) goto Success
- // if CAS (&m->Owner,0,Self) != 0 goto Success
- // goto SlowPath
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
- orcc(Rbox, G0, G0);
- brx(Assembler::notZero, false, Assembler::pn, done);
- delayed()->nop();
- st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- if (os::is_MP()) { membar(StoreLoad); }
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
- orcc(Rbox, Rscratch, G0);
- brx(Assembler::zero, false, Assembler::pt, done);
- delayed()->
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
- andcc(Rscratch, Rscratch, G0);
- brx(Assembler::notZero, false, Assembler::pt, done);
- delayed()->andcc(G0, G0, G0);
- add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
- mov(G2_thread, Rscratch);
- cas_ptr(Rmark, G0, Rscratch);
- cmp(Rscratch, G0);
- // invert icc.zf and goto done
- brx(Assembler::notZero, false, Assembler::pt, done);
- delayed()->cmp(G0, G0);
- br(Assembler::always, false, Assembler::pt, done);
- delayed()->cmp(G0, 1);
- } else {
- // 1-0 form : avoids CAS and MEMBAR in the common case
- // Do not bother to ratify that m->Owner == Self.
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
- orcc(Rbox, G0, G0);
- brx(Assembler::notZero, false, Assembler::pn, done);
- delayed()->
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
- orcc(Rbox, Rscratch, G0);
- if (EmitSync & 16384) {
- // As an optional optimization, if (EntryList|cxq) != null and _succ is null then
- // we should transfer control directly to the slow-path.
- // This test makes the reacquire operation below very infrequent.
- // The logic is equivalent to :
- // if (cxq|EntryList) == null : Owner=null; goto Success
- // if succ == null : goto SlowPath
- // Owner=null; membar #storeload
- // if succ != null : goto Success
- // if CAS(&Owner,null,Self) != null goto Success
- // goto SlowPath
- brx(Assembler::zero, true, Assembler::pt, done);
- delayed()->
- st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
- andcc(Rscratch, Rscratch, G0) ;
- brx(Assembler::zero, false, Assembler::pt, done);
- delayed()->orcc(G0, 1, G0);
- st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- } else {
- brx(Assembler::zero, false, Assembler::pt, done);
- delayed()->
- st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- }
- if (os::is_MP()) { membar(StoreLoad); }
- // Check that _succ is (or remains) non-zero
- ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
- andcc(Rscratch, Rscratch, G0);
- brx(Assembler::notZero, false, Assembler::pt, done);
- delayed()->andcc(G0, G0, G0);
- add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
- mov(G2_thread, Rscratch);
- cas_ptr(Rmark, G0, Rscratch);
- cmp(Rscratch, G0);
- // invert icc.zf and goto done
- // A slightly better v8+/v9 idiom would be the following:
- // movrnz Rscratch,1,Rscratch
- // ba done
- // xorcc Rscratch,1,G0
- // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
- brx(Assembler::notZero, false, Assembler::pt, done);
- delayed()->cmp(G0, G0);
- br(Assembler::always, false, Assembler::pt, done);
- delayed()->cmp(G0, 1);
- }
+ // 1-0 form : avoids CAS and MEMBAR in the common case
+ // Do not bother to ratify that m->Owner == Self.
+ ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
+ orcc(Rbox, G0, G0);
+ brx(Assembler::notZero, false, Assembler::pn, done);
+ delayed()->
+ ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
+ ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
+ orcc(Rbox, Rscratch, G0);
+ brx(Assembler::zero, false, Assembler::pt, done);
+ delayed()->
+ st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+
+ if (os::is_MP()) { membar(StoreLoad); }
+ // Check that _succ is (or remains) non-zero
+ ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
+ andcc(Rscratch, Rscratch, G0);
+ brx(Assembler::notZero, false, Assembler::pt, done);
+ delayed()->andcc(G0, G0, G0);
+ add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+ mov(G2_thread, Rscratch);
+ cas_ptr(Rmark, G0, Rscratch);
+ cmp(Rscratch, G0);
+ // invert icc.zf and goto done
+ // A slightly better v8+/v9 idiom would be the following:
+ // movrnz Rscratch,1,Rscratch
+ // ba done
+ // xorcc Rscratch,1,G0
+ // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
+ brx(Assembler::notZero, false, Assembler::pt, done);
+ delayed()->cmp(G0, G0);
+ br(Assembler::always, false, Assembler::pt, done);
+ delayed()->cmp(G0, 1);
bind (LStacked);
// Consider: we could replace the expensive CAS in the exit
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp Thu Sep 06 18:06:24 2018 -0700
@@ -1721,227 +1721,160 @@
if (counters != NULL) {
atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
}
- if (EmitSync & 1) {
- // set box->dhw = markOopDesc::unused_mark()
- // Force all sync thru slow-path: slow_enter() and slow_exit()
- movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
- cmpptr (rsp, (int32_t)NULL_WORD);
- } else {
- // Possible cases that we'll encounter in fast_lock
- // ------------------------------------------------
- // * Inflated
- // -- unlocked
- // -- Locked
- // = by self
- // = by other
- // * biased
- // -- by Self
- // -- by other
- // * neutral
- // * stack-locked
- // -- by self
- // = sp-proximity test hits
- // = sp-proximity test generates false-negative
- // -- by other
- //
-
- Label IsInflated, DONE_LABEL;
-
- // it's stack-locked, biased or neutral
- // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
- // order to reduce the number of conditional branches in the most common cases.
- // Beware -- there's a subtle invariant that fetch of the markword
- // at [FETCH], below, will never observe a biased encoding (*101b).
- // If this invariant is not held we risk exclusion (safety) failure.
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
- }
+
+ // Possible cases that we'll encounter in fast_lock
+ // ------------------------------------------------
+ // * Inflated
+ // -- unlocked
+ // -- Locked
+ // = by self
+ // = by other
+ // * biased
+ // -- by Self
+ // -- by other
+ // * neutral
+ // * stack-locked
+ // -- by self
+ // = sp-proximity test hits
+ // = sp-proximity test generates false-negative
+ // -- by other
+ //
+
+ Label IsInflated, DONE_LABEL;
+
+ // it's stack-locked, biased or neutral
+ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+ // order to reduce the number of conditional branches in the most common cases.
+ // Beware -- there's a subtle invariant that fetch of the markword
+ // at [FETCH], below, will never observe a biased encoding (*101b).
+ // If this invariant is not held we risk exclusion (safety) failure.
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+ }
#if INCLUDE_RTM_OPT
- if (UseRTMForStackLocks && use_rtm) {
- rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
- stack_rtm_counters, method_data, profile_rtm,
- DONE_LABEL, IsInflated);
- }
+ if (UseRTMForStackLocks && use_rtm) {
+ rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
+ stack_rtm_counters, method_data, profile_rtm,
+ DONE_LABEL, IsInflated);
+ }
#endif // INCLUDE_RTM_OPT
- movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
- testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
- jccb(Assembler::notZero, IsInflated);
-
- // Attempt stack-locking ...
- orptr (tmpReg, markOopDesc::unlocked_value);
- movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
- if (counters != NULL) {
- cond_inc32(Assembler::equal,
- ExternalAddress((address)counters->fast_path_entry_count_addr()));
- }
- jcc(Assembler::equal, DONE_LABEL); // Success
-
- // Recursive locking.
- // The object is stack-locked: markword contains stack pointer to BasicLock.
- // Locked by current thread if difference with current SP is less than one page.
- subptr(tmpReg, rsp);
- // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
- andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
- movptr(Address(boxReg, 0), tmpReg);
- if (counters != NULL) {
- cond_inc32(Assembler::equal,
- ExternalAddress((address)counters->fast_path_entry_count_addr()));
- }
- jmp(DONE_LABEL);
-
- bind(IsInflated);
- // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
+ movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
+ testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+ jccb(Assembler::notZero, IsInflated);
+
+ // Attempt stack-locking ...
+ orptr (tmpReg, markOopDesc::unlocked_value);
+ movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
+ if (counters != NULL) {
+ cond_inc32(Assembler::equal,
+ ExternalAddress((address)counters->fast_path_entry_count_addr()));
+ }
+ jcc(Assembler::equal, DONE_LABEL); // Success
+
+ // Recursive locking.
+ // The object is stack-locked: markword contains stack pointer to BasicLock.
+ // Locked by current thread if difference with current SP is less than one page.
+ subptr(tmpReg, rsp);
+ // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
+ andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+ movptr(Address(boxReg, 0), tmpReg);
+ if (counters != NULL) {
+ cond_inc32(Assembler::equal,
+ ExternalAddress((address)counters->fast_path_entry_count_addr()));
+ }
+ jmp(DONE_LABEL);
+
+ bind(IsInflated);
+ // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
#if INCLUDE_RTM_OPT
- // Use the same RTM locking code in 32- and 64-bit VM.
- if (use_rtm) {
- rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
- rtm_counters, method_data, profile_rtm, DONE_LABEL);
- } else {
+ // Use the same RTM locking code in 32- and 64-bit VM.
+ if (use_rtm) {
+ rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
+ rtm_counters, method_data, profile_rtm, DONE_LABEL);
+ } else {
#endif // INCLUDE_RTM_OPT
#ifndef _LP64
- // The object is inflated.
-
- // boxReg refers to the on-stack BasicLock in the current frame.
- // We'd like to write:
- // set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
- // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
- // additional latency as we have another ST in the store buffer that must drain.
-
- if (EmitSync & 8192) {
- movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty
- get_thread (scrReg);
- movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
- movptr(tmpReg, NULL_WORD); // consider: xor vs mov
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- } else
- if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
- // register juggle because we need tmpReg for cmpxchgptr below
- movptr(scrReg, boxReg);
- movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
-
- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [eax + Offset(_owner)-2]
- prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- }
-
- if ((EmitSync & 64) == 0) {
- // Optimistic form: consider XORL tmpReg,tmpReg
- movptr(tmpReg, NULL_WORD);
- } else {
- // Can suffer RTS->RTO upgrades on shared or cold $ lines
- // Test-And-CAS instead of CAS
- movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
- testptr(tmpReg, tmpReg); // Locked ?
- jccb (Assembler::notZero, DONE_LABEL);
- }
-
- // Appears unlocked - try to swing _owner from null to non-null.
- // Ideally, I'd manifest "Self" with get_thread and then attempt
- // to CAS the register containing Self into m->Owner.
- // But we don't have enough registers, so instead we can either try to CAS
- // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
- // we later store "Self" into m->Owner. Transiently storing a stack address
- // (rsp or the address of the box) into m->owner is harmless.
- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
- // If we weren't able to swing _owner from NULL to the BasicLock
- // then take the slow path.
- jccb (Assembler::notZero, DONE_LABEL);
- // update _owner from BasicLock to thread
- get_thread (scrReg); // beware: clobbers ICCs
- movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
- xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
-
- // If the CAS fails we can either retry or pass control to the slow-path.
- // We use the latter tactic.
- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
- // If the CAS was successful ...
- // Self has acquired the lock
- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
- // Intentional fall-through into DONE_LABEL ...
- } else {
- movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
- movptr(boxReg, tmpReg);
-
- // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
- if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [eax + Offset(_owner)-2]
- prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- }
-
- if ((EmitSync & 64) == 0) {
- // Optimistic form
- xorptr (tmpReg, tmpReg);
- } else {
- // Can suffer RTS->RTO upgrades on shared or cold $ lines
- movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
- testptr(tmpReg, tmpReg); // Locked ?
- jccb (Assembler::notZero, DONE_LABEL);
- }
-
- // Appears unlocked - try to swing _owner from null to non-null.
- // Use either "Self" (in scr) or rsp as thread identity in _owner.
- // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
- get_thread (scrReg);
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-
- // If the CAS fails we can either retry or pass control to the slow-path.
- // We use the latter tactic.
- // Pass the CAS result in the icc.ZFlag into DONE_LABEL
- // If the CAS was successful ...
- // Self has acquired the lock
- // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
- // Intentional fall-through into DONE_LABEL ...
- }
+ // The object is inflated.
+
+ // boxReg refers to the on-stack BasicLock in the current frame.
+ // We'd like to write:
+ // set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices.
+ // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
+ // additional latency as we have another ST in the store buffer that must drain.
+
+ // avoid ST-before-CAS
+ // register juggle because we need tmpReg for cmpxchgptr below
+ movptr(scrReg, boxReg);
+ movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
+
+ // Optimistic form: consider XORL tmpReg,tmpReg
+ movptr(tmpReg, NULL_WORD);
+
+ // Appears unlocked - try to swing _owner from null to non-null.
+ // Ideally, I'd manifest "Self" with get_thread and then attempt
+ // to CAS the register containing Self into m->Owner.
+ // But we don't have enough registers, so instead we can either try to CAS
+ // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
+ // we later store "Self" into m->Owner. Transiently storing a stack address
+ // (rsp or the address of the box) into m->owner is harmless.
+ // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
+ // If we weren't able to swing _owner from NULL to the BasicLock
+ // then take the slow path.
+ jccb (Assembler::notZero, DONE_LABEL);
+ // update _owner from BasicLock to thread
+ get_thread (scrReg); // beware: clobbers ICCs
+ movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
+ xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
+
+ // If the CAS fails we can either retry or pass control to the slow-path.
+ // We use the latter tactic.
+ // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+ // If the CAS was successful ...
+ // Self has acquired the lock
+ // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+ // Intentional fall-through into DONE_LABEL ...
#else // _LP64
- // It's inflated
- movq(scrReg, tmpReg);
- xorq(tmpReg, tmpReg);
-
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
- // Without cast to int32_t movptr will destroy r10 which is typically obj.
- movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
- // Intentional fall-through into DONE_LABEL ...
- // Propagate ICC.ZF from CAS above into DONE_LABEL.
+ // It's inflated
+ movq(scrReg, tmpReg);
+ xorq(tmpReg, tmpReg);
+
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
+ // Without cast to int32_t movptr will destroy r10 which is typically obj.
+ movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+ // Intentional fall-through into DONE_LABEL ...
+ // Propagate ICC.ZF from CAS above into DONE_LABEL.
#endif // _LP64
#if INCLUDE_RTM_OPT
- } // use_rtm()
+ } // use_rtm()
#endif
- // DONE_LABEL is a hot target - we'd really like to place it at the
- // start of cache line by padding with NOPs.
- // See the AMD and Intel software optimization manuals for the
- // most efficient "long" NOP encodings.
- // Unfortunately none of our alignment mechanisms suffice.
- bind(DONE_LABEL);
-
- // At DONE_LABEL the icc ZFlag is set as follows ...
- // Fast_Unlock uses the same protocol.
- // ZFlag == 1 -> Success
- // ZFlag == 0 -> Failure - force control through the slow-path
- }
+ // DONE_LABEL is a hot target - we'd really like to place it at the
+ // start of cache line by padding with NOPs.
+ // See the AMD and Intel software optimization manuals for the
+ // most efficient "long" NOP encodings.
+ // Unfortunately none of our alignment mechanisms suffice.
+ bind(DONE_LABEL);
+
+ // At DONE_LABEL the icc ZFlag is set as follows ...
+ // Fast_Unlock uses the same protocol.
+ // ZFlag == 1 -> Success
+ // ZFlag == 0 -> Failure - force control through the slow-path
}
// obj: object to unlock
@@ -1980,293 +1913,179 @@
assert(boxReg == rax, "");
assert_different_registers(objReg, boxReg, tmpReg);
- if (EmitSync & 4) {
- // Disable - inhibit all inlining. Force control through the slow-path
- cmpptr (rsp, 0);
- } else {
- Label DONE_LABEL, Stacked, CheckSucc;
-
- // Critically, the biased locking test must have precedence over
- // and appear before the (box->dhw == 0) recursive stack-lock test.
- if (UseBiasedLocking && !UseOptoBiasInlining) {
- biased_locking_exit(objReg, tmpReg, DONE_LABEL);
- }
+ Label DONE_LABEL, Stacked, CheckSucc;
+
+ // Critically, the biased locking test must have precedence over
+ // and appear before the (box->dhw == 0) recursive stack-lock test.
+ if (UseBiasedLocking && !UseOptoBiasInlining) {
+ biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+ }
#if INCLUDE_RTM_OPT
- if (UseRTMForStackLocks && use_rtm) {
- assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
- Label L_regular_unlock;
- movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
- andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
- cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
- jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
- xend(); // otherwise end...
- jmp(DONE_LABEL); // ... and we're done
- bind(L_regular_unlock);
- }
+ if (UseRTMForStackLocks && use_rtm) {
+ assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+ Label L_regular_unlock;
+ movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
+ andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+ cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
+ jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
+ xend(); // otherwise end...
+ jmp(DONE_LABEL); // ... and we're done
+ bind(L_regular_unlock);
+ }
#endif
- cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
- jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
- movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
- testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
- jccb (Assembler::zero, Stacked);
-
- // It's inflated.
+ cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+ jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
+ movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
+ testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
+ jccb (Assembler::zero, Stacked);
+
+ // It's inflated.
#if INCLUDE_RTM_OPT
- if (use_rtm) {
- Label L_regular_inflated_unlock;
- int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
- movptr(boxReg, Address(tmpReg, owner_offset));
- testptr(boxReg, boxReg);
- jccb(Assembler::notZero, L_regular_inflated_unlock);
- xend();
- jmpb(DONE_LABEL);
- bind(L_regular_inflated_unlock);
- }
+ if (use_rtm) {
+ Label L_regular_inflated_unlock;
+ int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
+ movptr(boxReg, Address(tmpReg, owner_offset));
+ testptr(boxReg, boxReg);
+ jccb(Assembler::notZero, L_regular_inflated_unlock);
+ xend();
+ jmpb(DONE_LABEL);
+ bind(L_regular_inflated_unlock);
+ }
#endif
- // Despite our balanced locking property we still check that m->_owner == Self
- // as java routines or native JNI code called by this thread might
- // have released the lock.
- // Refer to the comments in synchronizer.cpp for how we might encode extra
- // state in _succ so we can avoid fetching EntryList|cxq.
- //
- // I'd like to add more cases in fast_lock() and fast_unlock() --
- // such as recursive enter and exit -- but we have to be wary of
- // I$ bloat, T$ effects and BP$ effects.
- //
- // If there's no contention try a 1-0 exit. That is, exit without
- // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
- // we detect and recover from the race that the 1-0 exit admits.
- //
- // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
- // before it STs null into _owner, releasing the lock. Updates
- // to data protected by the critical section must be visible before
- // we drop the lock (and thus before any other thread could acquire
- // the lock and observe the fields protected by the lock).
- // IA32's memory-model is SPO, so STs are ordered with respect to
- // each other and there's no need for an explicit barrier (fence).
- // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+ // Despite our balanced locking property we still check that m->_owner == Self
+ // as java routines or native JNI code called by this thread might
+ // have released the lock.
+ // Refer to the comments in synchronizer.cpp for how we might encode extra
+ // state in _succ so we can avoid fetching EntryList|cxq.
+ //
+ // I'd like to add more cases in fast_lock() and fast_unlock() --
+ // such as recursive enter and exit -- but we have to be wary of
+ // I$ bloat, T$ effects and BP$ effects.
+ //
+ // If there's no contention try a 1-0 exit. That is, exit without
+ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
+ // we detect and recover from the race that the 1-0 exit admits.
+ //
+ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+ // before it STs null into _owner, releasing the lock. Updates
+ // to data protected by the critical section must be visible before
+ // we drop the lock (and thus before any other thread could acquire
+ // the lock and observe the fields protected by the lock).
+ // IA32's memory-model is SPO, so STs are ordered with respect to
+ // each other and there's no need for an explicit barrier (fence).
+ // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
#ifndef _LP64
- get_thread (boxReg);
- if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
- // prefetchw [ebx + Offset(_owner)-2]
- prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- }
-
- // Note that we could employ various encoding schemes to reduce
- // the number of loads below (currently 4) to just 2 or 3.
- // Refer to the comments in synchronizer.cpp.
- // In practice the chain of fetches doesn't seem to impact performance, however.
- xorptr(boxReg, boxReg);
- if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
- // Attempt to reduce branch density - AMD's branch predictor.
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
- } else {
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- jccb (Assembler::notZero, CheckSucc);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- jmpb (DONE_LABEL);
- }
-
- // The Following code fragment (EmitSync & 65536) improves the performance of
- // contended applications and contended synchronization microbenchmarks.
- // Unfortunately the emission of the code - even though not executed - causes regressions
- // in scimark and jetstream, evidently because of $ effects. Replacing the code
- // with an equal number of never-executed NOPs results in the same regression.
- // We leave it off by default.
-
- if ((EmitSync & 65536) != 0) {
- Label LSuccess, LGoSlowPath ;
-
- bind (CheckSucc);
-
- // Optional pre-test ... it's safe to elide this
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
- jccb(Assembler::zero, LGoSlowPath);
-
- // We have a classic Dekker-style idiom:
- // ST m->_owner = 0 ; MEMBAR; LD m->_succ
- // There are a number of ways to implement the barrier:
- // (1) lock:andl &m->_owner, 0
- // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
- // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
- // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
- // (2) If supported, an explicit MFENCE is appealing.
- // In older IA32 processors MFENCE is slower than lock:add or xchg
- // particularly if the write-buffer is full as might be the case if
- // if stores closely precede the fence or fence-equivalent instruction.
- // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
- // as the situation has changed with Nehalem and Shanghai.
- // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
- // The $lines underlying the top-of-stack should be in M-state.
- // The locked add instruction is serializing, of course.
- // (4) Use xchg, which is serializing
- // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
- // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
- // The integer condition codes will tell us if succ was 0.
- // Since _succ and _owner should reside in the same $line and
- // we just stored into _owner, it's likely that the $line
- // remains in M-state for the lock:orl.
- //
- // We currently use (3), although it's likely that switching to (2)
- // is correct for the future.
-
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
- if (os::is_MP()) {
- lock(); addptr(Address(rsp, 0), 0);
- }
- // Ratify _succ remains non-null
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
- jccb (Assembler::notZero, LSuccess);
-
- xorptr(boxReg, boxReg); // box is really EAX
- if (os::is_MP()) { lock(); }
- cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- // There's no successor so we tried to regrab the lock with the
- // placeholder value. If that didn't work, then another thread
- // grabbed the lock so we're done (and exit was a success).
- jccb (Assembler::notEqual, LSuccess);
- // Since we're low on registers we installed rsp as a placeholding in _owner.
- // Now install Self over rsp. This is safe as we're transitioning from
- // non-null to non=null
- get_thread (boxReg);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
- // Intentional fall-through into LGoSlowPath ...
-
- bind (LGoSlowPath);
- orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
- jmpb (DONE_LABEL);
-
- bind (LSuccess);
- xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
- jmpb (DONE_LABEL);
- }
-
- bind (Stacked);
- // It's not inflated and it's not recursively stack-locked and it's not biased.
- // It must be stack-locked.
- // Try to reset the header to displaced header.
- // The "box" value on the stack is stable, so we can reload
- // and be assured we observe the same value as above.
- movptr(tmpReg, Address(boxReg, 0));
- if (os::is_MP()) {
- lock();
- }
- cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
- // Intention fall-thru into DONE_LABEL
-
- // DONE_LABEL is a hot target - we'd really like to place it at the
- // start of cache line by padding with NOPs.
- // See the AMD and Intel software optimization manuals for the
- // most efficient "long" NOP encodings.
- // Unfortunately none of our alignment mechanisms suffice.
- if ((EmitSync & 65536) == 0) {
- bind (CheckSucc);
- }
+ get_thread (boxReg);
+
+ // Note that we could employ various encoding schemes to reduce
+ // the number of loads below (currently 4) to just 2 or 3.
+ // Refer to the comments in synchronizer.cpp.
+ // In practice the chain of fetches doesn't seem to impact performance, however.
+ xorptr(boxReg, boxReg);
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ jccb (Assembler::notZero, DONE_LABEL);
+ movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ jccb (Assembler::notZero, CheckSucc);
+ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+ jmpb (DONE_LABEL);
+
+ bind (Stacked);
+ // It's not inflated and it's not recursively stack-locked and it's not biased.
+ // It must be stack-locked.
+ // Try to reset the header to displaced header.
+ // The "box" value on the stack is stable, so we can reload
+ // and be assured we observe the same value as above.
+ movptr(tmpReg, Address(boxReg, 0));
+ if (os::is_MP()) {
+ lock();
+ }
+ cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+ // Intention fall-thru into DONE_LABEL
+
+ // DONE_LABEL is a hot target - we'd really like to place it at the
+ // start of cache line by padding with NOPs.
+ // See the AMD and Intel software optimization manuals for the
+ // most efficient "long" NOP encodings.
+ // Unfortunately none of our alignment mechanisms suffice.
+ bind (CheckSucc);
#else // _LP64
- // It's inflated
- if (EmitSync & 1024) {
- // Emit code to check that _owner == Self
- // We could fold the _owner test into subsequent code more efficiently
- // than using a stand-alone check, but since _owner checking is off by
- // default we don't bother. We also might consider predicating the
- // _owner==Self check on Xcheck:jni or running on a debug build.
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- xorptr(boxReg, r15_thread);
- } else {
- xorptr(boxReg, boxReg);
- }
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
- jccb (Assembler::notZero, DONE_LABEL);
- movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
- orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
- jccb (Assembler::notZero, CheckSucc);
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
- jmpb (DONE_LABEL);
-
- if ((EmitSync & 65536) == 0) {
- // Try to avoid passing control into the slow_path ...
- Label LSuccess, LGoSlowPath ;
- bind (CheckSucc);
-
- // The following optional optimization can be elided if necessary
- // Effectively: if (succ == null) goto SlowPath
- // The code reduces the window for a race, however,
- // and thus benefits performance.
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
- jccb (Assembler::zero, LGoSlowPath);
-
- xorptr(boxReg, boxReg);
- if ((EmitSync & 16) && os::is_MP()) {
- xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- } else {
- movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
- if (os::is_MP()) {
- // Memory barrier/fence
- // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
- // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
- // This is faster on Nehalem and AMD Shanghai/Barcelona.
- // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
- // We might also restructure (ST Owner=0;barrier;LD _Succ) to
- // (mov box,0; xchgq box, &m->Owner; LD _succ) .
- lock(); addl(Address(rsp, 0), 0);
- }
- }
- cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
- jccb (Assembler::notZero, LSuccess);
-
- // Rare inopportune interleaving - race.
- // The successor vanished in the small window above.
- // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
- // We need to ensure progress and succession.
- // Try to reacquire the lock.
- // If that fails then the new owner is responsible for succession and this
- // thread needs to take no further action and can exit via the fast path (success).
- // If the re-acquire succeeds then pass control into the slow path.
- // As implemented, this latter mode is horrible because we generated more
- // coherence traffic on the lock *and* artifically extended the critical section
- // length while by virtue of passing control into the slow path.
-
- // box is really RAX -- the following CMPXCHG depends on that binding
- // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
- if (os::is_MP()) { lock(); }
- cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
- // There's no successor so we tried to regrab the lock.
- // If that didn't work, then another thread grabbed the
- // lock so we're done (and exit was a success).
- jccb (Assembler::notEqual, LSuccess);
- // Intentional fall-through into slow-path
-
- bind (LGoSlowPath);
- orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
- jmpb (DONE_LABEL);
-
- bind (LSuccess);
- testl (boxReg, 0); // set ICC.ZF=1 to indicate success
- jmpb (DONE_LABEL);
- }
-
- bind (Stacked);
- movptr(tmpReg, Address (boxReg, 0)); // re-fetch
- if (os::is_MP()) { lock(); }
- cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
-
- if (EmitSync & 65536) {
- bind (CheckSucc);
- }
+ // It's inflated
+ xorptr(boxReg, boxReg);
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+ jccb (Assembler::notZero, DONE_LABEL);
+ movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+ orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+ jccb (Assembler::notZero, CheckSucc);
+ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
+ jmpb (DONE_LABEL);
+
+ // Try to avoid passing control into the slow_path ...
+ Label LSuccess, LGoSlowPath ;
+ bind (CheckSucc);
+
+ // The following optional optimization can be elided if necessary
+ // Effectively: if (succ == null) goto SlowPath
+ // The code reduces the window for a race, however,
+ // and thus benefits performance.
+ cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
+ jccb (Assembler::zero, LGoSlowPath);
+
+ xorptr(boxReg, boxReg);
+ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
+ if (os::is_MP()) {
+ // Memory barrier/fence
+ // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
+ // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
+ // This is faster on Nehalem and AMD Shanghai/Barcelona.
+ // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
+ // We might also restructure (ST Owner=0;barrier;LD _Succ) to
+ // (mov box,0; xchgq box, &m->Owner; LD _succ) .
+ lock(); addl(Address(rsp, 0), 0);
+ }
+ cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
+ jccb (Assembler::notZero, LSuccess);
+
+ // Rare inopportune interleaving - race.
+ // The successor vanished in the small window above.
+ // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
+ // We need to ensure progress and succession.
+ // Try to reacquire the lock.
+ // If that fails then the new owner is responsible for succession and this
+ // thread needs to take no further action and can exit via the fast path (success).
+ // If the re-acquire succeeds then pass control into the slow path.
+ // As implemented, this latter mode is horrible because we generated more
+ // coherence traffic on the lock *and* artifically extended the critical section
+ // length while by virtue of passing control into the slow path.
+
+ // box is really RAX -- the following CMPXCHG depends on that binding
+ // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
+ if (os::is_MP()) { lock(); }
+ cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ // There's no successor so we tried to regrab the lock.
+ // If that didn't work, then another thread grabbed the
+ // lock so we're done (and exit was a success).
+ jccb (Assembler::notEqual, LSuccess);
+ // Intentional fall-through into slow-path
+
+ bind (LGoSlowPath);
+ orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
+ jmpb (DONE_LABEL);
+
+ bind (LSuccess);
+ testl (boxReg, 0); // set ICC.ZF=1 to indicate success
+ jmpb (DONE_LABEL);
+
+ bind (Stacked);
+ movptr(tmpReg, Address (boxReg, 0)); // re-fetch
+ if (os::is_MP()) { lock(); }
+ cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+
#endif
- bind(DONE_LABEL);
- }
+ bind(DONE_LABEL);
}
#endif // COMPILER2
--- a/src/hotspot/share/runtime/arguments.cpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/share/runtime/arguments.cpp Thu Sep 06 18:06:24 2018 -0700
@@ -574,6 +574,7 @@
{ "PrintSafepointStatisticsCount", JDK_Version::jdk(11), JDK_Version::jdk(12), JDK_Version::jdk(13) },
{ "TransmitErrorReport", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
{ "ErrorReportServer", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
+ { "EmitSync", JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
#ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
{ "dep > obs", JDK_Version::jdk(9), JDK_Version::jdk(8), JDK_Version::undefined() },
@@ -3977,7 +3978,7 @@
}
}
#ifdef COMPILER2
- if (!UseBiasedLocking || EmitSync != 0) {
+ if (!UseBiasedLocking) {
UseOptoBiasInlining = false;
}
#endif
--- a/src/hotspot/share/runtime/globals.hpp Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/share/runtime/globals.hpp Thu Sep 06 18:06:24 2018 -0700
@@ -830,10 +830,6 @@
experimental(ccstr, SyncKnobs, NULL, \
"(Unstable) Various monitor synchronization tunables") \
\
- experimental(intx, EmitSync, 0, \
- "(Unsafe, Unstable) " \
- "Control emission of inline sync fast-path code") \
- \
product(intx, MonitorBound, 0, "Bound Monitor population") \
range(0, max_jint) \
\