8210381: Obsolete EmitSync
authormikael
Thu, 06 Sep 2018 18:06:24 -0700
changeset 51663 a65d8a6fa424
parent 51662 fe4349d27282
child 51664 a8bdd9c24d37
8210381: Obsolete EmitSync Reviewed-by: kvn, dcubed, mdoerr, mbaesken, shade
src/hotspot/cpu/aarch64/aarch64.ad
src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
src/hotspot/cpu/s390/macroAssembler_s390.cpp
src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
src/hotspot/cpu/x86/macroAssembler_x86.cpp
src/hotspot/share/runtime/arguments.cpp
src/hotspot/share/runtime/globals.hpp
--- a/src/hotspot/cpu/aarch64/aarch64.ad	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/aarch64/aarch64.ad	Thu Sep 06 18:06:24 2018 -0700
@@ -3378,26 +3378,18 @@
     // Load markOop from object into displaced_header.
     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 
-    // Always do locking in runtime.
-    if (EmitSync & 0x01) {
-      __ cmp(oop, zr);
-      return;
-    }
-
     if (UseBiasedLocking && !UseOptoBiasInlining) {
       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
     }
 
     // Handle existing monitor
-    if ((EmitSync & 0x02) == 0) {
-      // we can use AArch64's bit test and branch here but
-      // markoopDesc does not define a bit index just the bit value
-      // so assert in case the bit pos changes
-#     define __monitor_value_log2 1
-      assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
-      __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
-#     undef __monitor_value_log2
-    }
+    // we can use AArch64's bit test and branch here but
+    // markoopDesc does not define a bit index just the bit value
+    // so assert in case the bit pos changes
+#   define __monitor_value_log2 1
+    assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
+    __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
+#   undef __monitor_value_log2
 
     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
@@ -3455,63 +3447,62 @@
     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 
     // Handle existing monitor.
-    if ((EmitSync & 0x02) == 0) {
-      __ b(cont);
-
-      __ bind(object_has_monitor);
-      // The object's monitor m is unlocked iff m->owner == NULL,
-      // otherwise m->owner may contain a thread or a stack address.
-      //
-      // Try to CAS m->owner from NULL to current thread.
-      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
-      __ mov(disp_hdr, zr);
-
-      if (UseLSE) {
-        __ mov(rscratch1, disp_hdr);
-        __ casal(Assembler::xword, rscratch1, rthread, tmp);
-        __ cmp(rscratch1, disp_hdr);
-      } else {
-        Label retry_load, fail;
-        if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
-          __ prfm(Address(tmp), PSTL1STRM);
-        __ bind(retry_load);
-        __ ldaxr(rscratch1, tmp);
-        __ cmp(disp_hdr, rscratch1);
-        __ br(Assembler::NE, fail);
-        // use stlxr to ensure update is immediately visible
-        __ stlxr(rscratch1, rthread, tmp);
-        __ cbnzw(rscratch1, retry_load);
-        __ bind(fail);
+    __ b(cont);
+
+    __ bind(object_has_monitor);
+    // The object's monitor m is unlocked iff m->owner == NULL,
+    // otherwise m->owner may contain a thread or a stack address.
+    //
+    // Try to CAS m->owner from NULL to current thread.
+    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
+    __ mov(disp_hdr, zr);
+
+    if (UseLSE) {
+      __ mov(rscratch1, disp_hdr);
+      __ casal(Assembler::xword, rscratch1, rthread, tmp);
+      __ cmp(rscratch1, disp_hdr);
+    } else {
+      Label retry_load, fail;
+      if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
+        __ prfm(Address(tmp), PSTL1STRM);
       }
-
-      // Label next;
-      // __ cmpxchgptr(/*oldv=*/disp_hdr,
-      //               /*newv=*/rthread,
-      //               /*addr=*/tmp,
-      //               /*tmp=*/rscratch1,
-      //               /*succeed*/next,
-      //               /*fail*/NULL);
-      // __ bind(next);
-
-      // store a non-null value into the box.
-      __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
-      // PPC port checks the following invariants
-      // #ifdef ASSERT
-      // bne(flag, cont);
-      // We have acquired the monitor, check some invariants.
-      // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
-      // Invariant 1: _recursions should be 0.
-      // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
-      // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
-      //                        "monitor->_recursions should be 0", -1);
-      // Invariant 2: OwnerIsThread shouldn't be 0.
-      // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
-      //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
-      //                           "monitor->OwnerIsThread shouldn't be 0", -1);
-      // #endif
+      __ bind(retry_load);
+      __ ldaxr(rscratch1, tmp);
+      __ cmp(disp_hdr, rscratch1);
+      __ br(Assembler::NE, fail);
+      // use stlxr to ensure update is immediately visible
+      __ stlxr(rscratch1, rthread, tmp);
+      __ cbnzw(rscratch1, retry_load);
+      __ bind(fail);
     }
 
+    // Label next;
+    // __ cmpxchgptr(/*oldv=*/disp_hdr,
+    //               /*newv=*/rthread,
+    //               /*addr=*/tmp,
+    //               /*tmp=*/rscratch1,
+    //               /*succeed*/next,
+    //               /*fail*/NULL);
+    // __ bind(next);
+
+    // store a non-null value into the box.
+    __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // PPC port checks the following invariants
+    // #ifdef ASSERT
+    // bne(flag, cont);
+    // We have acquired the monitor, check some invariants.
+    // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
+    // Invariant 1: _recursions should be 0.
+    // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+    // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
+    //                        "monitor->_recursions should be 0", -1);
+    // Invariant 2: OwnerIsThread shouldn't be 0.
+    // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
+    //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
+    //                           "monitor->OwnerIsThread shouldn't be 0", -1);
+    // #endif
+
     __ bind(cont);
     // flag == EQ indicates success
     // flag == NE indicates failure
@@ -3533,12 +3524,6 @@
 
     assert_different_registers(oop, box, tmp, disp_hdr);
 
-    // Always do locking in runtime.
-    if (EmitSync & 0x01) {
-      __ cmp(oop, zr); // Oop can't be 0 here => always false.
-      return;
-    }
-
     if (UseBiasedLocking && !UseOptoBiasInlining) {
       __ biased_locking_exit(oop, tmp, cont);
     }
@@ -3552,10 +3537,8 @@
 
 
     // Handle existing monitor.
-    if ((EmitSync & 0x02) == 0) {
-      __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-      __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
-    }
+    __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+    __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
 
     // Check if it is still a light weight lock, this is is true if we
     // see the stack address of the basicLock in the markOop of the
@@ -3590,27 +3573,25 @@
     __ bind(cas_failed);
 
     // Handle existing monitor.
-    if ((EmitSync & 0x02) == 0) {
-      __ b(cont);
-
-      __ bind(object_has_monitor);
-      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
-      __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-      __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
-      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
-      __ cmp(rscratch1, zr);
-      __ br(Assembler::NE, cont);
-
-      __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-      __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-      __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
-      __ cmp(rscratch1, zr);
-      __ cbnz(rscratch1, cont);
-      // need a release store here
-      __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-      __ stlr(rscratch1, tmp); // rscratch1 is zero
-    }
+    __ b(cont);
+
+    __ bind(object_has_monitor);
+    __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
+    __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+    __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+    __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
+    __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
+    __ cmp(rscratch1, zr);
+    __ br(Assembler::NE, cont);
+
+    __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+    __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+    __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
+    __ cmp(rscratch1, zr);
+    __ cbnz(rscratch1, cont);
+    // need a release store here
+    __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+    __ stlr(rscratch1, tmp); // rscratch1 is zero
 
     __ bind(cont);
     // flag == EQ indicates success
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp	Thu Sep 06 18:06:24 2018 -0700
@@ -2848,12 +2848,6 @@
   ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
 
 
-  // Always do locking in runtime.
-  if (EmitSync & 0x01) {
-    cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
-    return;
-  }
-
   if (try_bias) {
     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
   }
@@ -2867,11 +2861,9 @@
 #endif // INCLUDE_RTM_OPT
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    // The object has an existing monitor iff (mark & monitor_value) != 0.
-    andi_(temp, displaced_header, markOopDesc::monitor_value);
-    bne(CCR0, object_has_monitor);
-  }
+  // The object has an existing monitor iff (mark & monitor_value) != 0.
+  andi_(temp, displaced_header, markOopDesc::monitor_value);
+  bne(CCR0, object_has_monitor);
 
   // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
   ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
@@ -2914,48 +2906,46 @@
   std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    b(cont);
-
-    bind(object_has_monitor);
-    // The object's monitor m is unlocked iff m->owner == NULL,
-    // otherwise m->owner may contain a thread or a stack address.
+  b(cont);
+
+  bind(object_has_monitor);
+  // The object's monitor m is unlocked iff m->owner == NULL,
+  // otherwise m->owner may contain a thread or a stack address.
 
 #if INCLUDE_RTM_OPT
-    // Use the same RTM locking code in 32- and 64-bit VM.
-    if (use_rtm) {
-      rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
-                           rtm_counters, method_data, profile_rtm, cont);
-    } else {
+  // Use the same RTM locking code in 32- and 64-bit VM.
+  if (use_rtm) {
+    rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+                         rtm_counters, method_data, profile_rtm, cont);
+  } else {
 #endif // INCLUDE_RTM_OPT
 
-    // Try to CAS m->owner from NULL to current thread.
-    addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
-    cmpxchgd(/*flag=*/flag,
-             /*current_value=*/current_header,
-             /*compare_value=*/(intptr_t)0,
-             /*exchange_value=*/R16_thread,
-             /*where=*/temp,
-             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
-             MacroAssembler::cmpxchgx_hint_acquire_lock());
-
-    // Store a non-null value into the box.
-    std(box, BasicLock::displaced_header_offset_in_bytes(), box);
-
-#   ifdef ASSERT
-    bne(flag, cont);
-    // We have acquired the monitor, check some invariants.
-    addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
-    // Invariant 1: _recursions should be 0.
-    //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
-    asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
+  // Try to CAS m->owner from NULL to current thread.
+  addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
+  cmpxchgd(/*flag=*/flag,
+           /*current_value=*/current_header,
+           /*compare_value=*/(intptr_t)0,
+           /*exchange_value=*/R16_thread,
+           /*where=*/temp,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock());
+
+  // Store a non-null value into the box.
+  std(box, BasicLock::displaced_header_offset_in_bytes(), box);
+
+# ifdef ASSERT
+  bne(flag, cont);
+  // We have acquired the monitor, check some invariants.
+  addi(/*monitor=*/temp, temp, -ObjectMonitor::owner_offset_in_bytes());
+  // Invariant 1: _recursions should be 0.
+  //assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
+  asm_assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), temp,
                             "monitor->_recursions should be 0", -1);
-#   endif
+# endif
 
 #if INCLUDE_RTM_OPT
-    } // use_rtm()
+  } // use_rtm()
 #endif
-  }
 
   bind(cont);
   // flag == EQ indicates success
@@ -2970,12 +2960,6 @@
   Label cont;
   Label object_has_monitor;
 
-  // Always do locking in runtime.
-  if (EmitSync & 0x01) {
-    cmpdi(flag, oop, 0); // Oop can't be 0 here => always false.
-    return;
-  }
-
   if (try_bias) {
     biased_locking_exit(flag, oop, current_header, cont);
   }
@@ -3002,13 +2986,11 @@
   beq(flag, cont);
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    // The object has an existing monitor iff (mark & monitor_value) != 0.
-    RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
-    ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
-    andi_(R0, current_header, markOopDesc::monitor_value);
-    bne(CCR0, object_has_monitor);
-  }
+  // The object has an existing monitor iff (mark & monitor_value) != 0.
+  RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
+  ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
+  andi_(R0, current_header, markOopDesc::monitor_value);
+  bne(CCR0, object_has_monitor);
 
   // Check if it is still a light weight lock, this is is true if we see
   // the stack address of the basicLock in the markOop of the object.
@@ -3026,40 +3008,38 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    b(cont);
-
-    bind(object_has_monitor);
-    addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
-    ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
+  b(cont);
+
+  bind(object_has_monitor);
+  addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
+  ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
 
     // It's inflated.
 #if INCLUDE_RTM_OPT
-    if (use_rtm) {
-      Label L_regular_inflated_unlock;
-      // Clean monitor_value bit to get valid pointer
-      cmpdi(flag, temp, 0);
-      bne(flag, L_regular_inflated_unlock);
-      tend_();
-      b(cont);
-      bind(L_regular_inflated_unlock);
-    }
+  if (use_rtm) {
+    Label L_regular_inflated_unlock;
+    // Clean monitor_value bit to get valid pointer
+    cmpdi(flag, temp, 0);
+    bne(flag, L_regular_inflated_unlock);
+    tend_();
+    b(cont);
+    bind(L_regular_inflated_unlock);
+  }
 #endif
 
-    ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
-    xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
-    orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
-    cmpdi(flag, temp, 0);
-    bne(flag, cont);
-
-    ld(temp,             ObjectMonitor::EntryList_offset_in_bytes(), current_header);
-    ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
-    orr(temp, temp, displaced_header); // Will be 0 if both are 0.
-    cmpdi(flag, temp, 0);
-    bne(flag, cont);
-    release();
-    std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
-  }
+  ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
+  xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
+  orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
+  cmpdi(flag, temp, 0);
+  bne(flag, cont);
+
+  ld(temp,             ObjectMonitor::EntryList_offset_in_bytes(), current_header);
+  ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
+  orr(temp, temp, displaced_header); // Will be 0 if both are 0.
+  cmpdi(flag, temp, 0);
+  bne(flag, cont);
+  release();
+  std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
 
   bind(cont);
   // flag == EQ indicates success
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp	Thu Sep 06 18:06:24 2018 -0700
@@ -3374,13 +3374,11 @@
   }
 
   // Handle existing monitor.
-  if ((EmitSync & 0x01) == 0) {
-    // The object has an existing monitor iff (mark & monitor_value) != 0.
-    guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
-    z_lr(temp, displacedHeader);
-    z_nill(temp, markOopDesc::monitor_value);
-    z_brne(object_has_monitor);
-  }
+  // The object has an existing monitor iff (mark & monitor_value) != 0.
+  guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+  z_lr(temp, displacedHeader);
+  z_nill(temp, markOopDesc::monitor_value);
+  z_brne(object_has_monitor);
 
   // Set mark to markOop | markOopDesc::unlocked_value.
   z_oill(displacedHeader, markOopDesc::unlocked_value);
@@ -3411,28 +3409,26 @@
 
   z_bru(done);
 
-  if ((EmitSync & 0x01) == 0) {
-    Register zero = temp;
-    Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
-    bind(object_has_monitor);
-    // The object's monitor m is unlocked iff m->owner == NULL,
-    // otherwise m->owner may contain a thread or a stack address.
-    //
-    // Try to CAS m->owner from NULL to current thread.
-    z_lghi(zero, 0);
-    // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
-    z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
-    // Store a non-null value into the box.
-    z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
+  Register zero = temp;
+  Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value.
+  bind(object_has_monitor);
+  // The object's monitor m is unlocked iff m->owner == NULL,
+  // otherwise m->owner may contain a thread or a stack address.
+  //
+  // Try to CAS m->owner from NULL to current thread.
+  z_lghi(zero, 0);
+  // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ.
+  z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged);
+  // Store a non-null value into the box.
+  z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box);
 #ifdef ASSERT
-      z_brne(done);
-      // We've acquired the monitor, check some invariants.
-      // Invariant 1: _recursions should be 0.
-      asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
-                              "monitor->_recursions should be 0", -1);
-      z_ltgr(zero, zero); // Set CR=EQ.
+  z_brne(done);
+  // We've acquired the monitor, check some invariants.
+  // Invariant 1: _recursions should be 0.
+  asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged,
+                          "monitor->_recursions should be 0", -1);
+  z_ltgr(zero, zero); // Set CR=EQ.
 #endif
-  }
   bind(done);
 
   BLOCK_COMMENT("} compiler_fast_lock_object");
@@ -3461,13 +3457,11 @@
   z_bre(done);
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    // The object has an existing monitor iff (mark & monitor_value) != 0.
-    z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
-    guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
-    z_nill(currentHeader, markOopDesc::monitor_value);
-    z_brne(object_has_monitor);
-  }
+  // The object has an existing monitor iff (mark & monitor_value) != 0.
+  z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);
+  guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word");
+  z_nill(currentHeader, markOopDesc::monitor_value);
+  z_brne(object_has_monitor);
 
   // Check if it is still a light weight lock, this is true if we see
   // the stack address of the basicLock in the markOop of the object
@@ -3477,20 +3471,18 @@
   z_bru(done); // Csg sets CR as desired.
 
   // Handle existing monitor.
-  if ((EmitSync & 0x02) == 0) {
-    bind(object_has_monitor);
-    z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);    // CurrentHeader is tagged with monitor_value set.
-    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
-    z_brne(done);
-    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-    z_brne(done);
-    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-    z_brne(done);
-    load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-    z_brne(done);
-    z_release();
-    z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
-  }
+  bind(object_has_monitor);
+  z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop);    // CurrentHeader is tagged with monitor_value set.
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+  z_brne(done);
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+  z_brne(done);
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+  z_brne(done);
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+  z_brne(done);
+  z_release();
+  z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
 
   bind(done);
 
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp	Thu Sep 06 18:06:24 2018 -0700
@@ -2648,195 +2648,92 @@
      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
    }
 
-   if (EmitSync & 1) {
-     mov(3, Rscratch);
-     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     cmp(SP, G0);
-     return ;
-   }
-
-   if (EmitSync & 2) {
-
-     // Fetch object's markword
-     ld_ptr(mark_addr, Rmark);
-
-     if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-     }
-
-     // Save Rbox in Rscratch to be used for the cas operation
-     mov(Rbox, Rscratch);
-
-     // set Rmark to markOop | markOopDesc::unlocked_value
-     or3(Rmark, markOopDesc::unlocked_value, Rmark);
-
-     // Initialize the box.  (Must happen before we update the object mark!)
-     st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
-     // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
-     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-     cas_ptr(mark_addr.base(), Rmark, Rscratch);
-
-     // if compare/exchange succeeded we found an unlocked object and we now have locked it
-     // hence we are done
-     cmp(Rmark, Rscratch);
-     sub(Rscratch, STACK_BIAS, Rscratch);
-     brx(Assembler::equal, false, Assembler::pt, done);
-     delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
-
-     // we did not find an unlocked object so see if this is a recursive case
-     // sub(Rscratch, SP, Rscratch);
-     assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-     andcc(Rscratch, 0xfffff003, Rscratch);
-     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     bind (done);
-     return ;
-   }
-
    Label Egress ;
 
-   if (EmitSync & 256) {
-      Label IsInflated ;
-
-      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
-      // Triage: biased, stack-locked, neutral, inflated
-      if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-        // Invariant: if control reaches this point in the emitted stream
-        // then Rmark has not been modified.
-      }
-
-      // Store mark into displaced mark field in the on-stack basic-lock "box"
-      // Critically, this must happen before the CAS
-      // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
-      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      andcc(Rmark, 2, G0);
-      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
-      delayed()->
-
-      // Try stack-lock acquisition.
-      // Beware: the 1st instruction is in a delay slot
-      mov(Rbox,  Rscratch);
-      or3(Rmark, markOopDesc::unlocked_value, Rmark);
-      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-      cas_ptr(mark_addr.base(), Rmark, Rscratch);
-      cmp(Rmark, Rscratch);
-      brx(Assembler::equal, false, Assembler::pt, done);
-      delayed()->sub(Rscratch, SP, Rscratch);
-
-      // Stack-lock attempt failed - check for recursive stack-lock.
-      // See the comments below about how we might remove this case.
-      sub(Rscratch, STACK_BIAS, Rscratch);
-      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc(Rscratch, 0xfffff003, Rscratch);
-      br(Assembler::always, false, Assembler::pt, done);
-      delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
-      bind(IsInflated);
-      if (EmitSync & 64) {
-         // If m->owner != null goto IsLocked
-         // Pessimistic form: Test-and-CAS vs CAS
-         // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
-         andcc(Rscratch, Rscratch, G0);
-         brx(Assembler::notZero, false, Assembler::pn, done);
-         delayed()->nop();
-         // m->owner == null : it's unlocked.
-      }
-
-      // Try to CAS m->owner from null to Self
-      // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
-      mov(G2_thread, Rscratch);
-      cas_ptr(Rmark, G0, Rscratch);
-      cmp(Rscratch, G0);
-      // Intentional fall-through into done
-   } else {
-      // Aggressively avoid the Store-before-CAS penalty
-      // Defer the store into box->dhw until after the CAS
-      Label IsInflated, Recursive ;
+   // Aggressively avoid the Store-before-CAS penalty
+   // Defer the store into box->dhw until after the CAS
+   Label IsInflated, Recursive ;
 
 // Anticipate CAS -- Avoid RTS->RTO upgrade
 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
 
-      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
-      // Triage: biased, stack-locked, neutral, inflated
-
-      if (try_bias) {
-        biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
-        // Invariant: if control reaches this point in the emitted stream
-        // then Rmark has not been modified.
-      }
-      andcc(Rmark, 2, G0);
-      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
-      delayed()->                         // Beware - dangling delay-slot
-
-      // Try stack-lock acquisition.
-      // Transiently install BUSY (0) encoding in the mark word.
-      // if the CAS of 0 into the mark was successful then we execute:
-      //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
-      //   ST obj->mark = box    -- overwrite transient 0 value
-      // This presumes TSO, of course.
-
-      mov(0, Rscratch);
-      or3(Rmark, markOopDesc::unlocked_value, Rmark);
-      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-      cas_ptr(mark_addr.base(), Rmark, Rscratch);
+   ld_ptr(mark_addr, Rmark);           // fetch obj->mark
+   // Triage: biased, stack-locked, neutral, inflated
+
+   if (try_bias) {
+     biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
+     // Invariant: if control reaches this point in the emitted stream
+     // then Rmark has not been modified.
+   }
+   andcc(Rmark, 2, G0);
+   brx(Assembler::notZero, false, Assembler::pn, IsInflated);
+   delayed()->                         // Beware - dangling delay-slot
+
+   // Try stack-lock acquisition.
+   // Transiently install BUSY (0) encoding in the mark word.
+   // if the CAS of 0 into the mark was successful then we execute:
+   //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
+   //   ST obj->mark = box    -- overwrite transient 0 value
+   // This presumes TSO, of course.
+
+   mov(0, Rscratch);
+   or3(Rmark, markOopDesc::unlocked_value, Rmark);
+   assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+   cas_ptr(mark_addr.base(), Rmark, Rscratch);
 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
-      cmp(Rscratch, Rmark);
-      brx(Assembler::notZero, false, Assembler::pn, Recursive);
-      delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      if (counters != NULL) {
-        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
-      }
-      ba(done);
-      delayed()->st_ptr(Rbox, mark_addr);
-
-      bind(Recursive);
-      // Stack-lock attempt failed - check for recursive stack-lock.
-      // Tests show that we can remove the recursive case with no impact
-      // on refworkload 0.83.  If we need to reduce the size of the code
-      // emitted by compiler_lock_object() the recursive case is perfect
-      // candidate.
-      //
-      // A more extreme idea is to always inflate on stack-lock recursion.
-      // This lets us eliminate the recursive checks in compiler_lock_object
-      // and compiler_unlock_object and the (box->dhw == 0) encoding.
-      // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
-      // and showed a performance *increase*.  In the same experiment I eliminated
-      // the fast-path stack-lock code from the interpreter and always passed
-      // control to the "slow" operators in synchronizer.cpp.
-
-      // RScratch contains the fetched obj->mark value from the failed CAS.
-      sub(Rscratch, STACK_BIAS, Rscratch);
-      sub(Rscratch, SP, Rscratch);
-      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc(Rscratch, 0xfffff003, Rscratch);
-      if (counters != NULL) {
-        // Accounting needs the Rscratch register
-        st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-        cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
-        ba_short(done);
-      } else {
-        ba(done);
-        delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      }
-
-      bind   (IsInflated);
-
-      // Try to CAS m->owner from null to Self
-      // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
-      mov(G2_thread, Rscratch);
-      cas_ptr(Rmark, G0, Rscratch);
-      andcc(Rscratch, Rscratch, G0);             // set ICCs for done: icc.zf iff success
-      // set icc.zf : 1=success 0=failure
-      // ST box->displaced_header = NonZero.
-      // Any non-zero value suffices:
-      //    markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
-      st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      // Intentional fall-through into done
+   cmp(Rscratch, Rmark);
+   brx(Assembler::notZero, false, Assembler::pn, Recursive);
+   delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+   if (counters != NULL) {
+     cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
    }
+   ba(done);
+   delayed()->st_ptr(Rbox, mark_addr);
+
+   bind(Recursive);
+   // Stack-lock attempt failed - check for recursive stack-lock.
+   // Tests show that we can remove the recursive case with no impact
+   // on refworkload 0.83.  If we need to reduce the size of the code
+   // emitted by compiler_lock_object() the recursive case is perfect
+   // candidate.
+   //
+   // A more extreme idea is to always inflate on stack-lock recursion.
+   // This lets us eliminate the recursive checks in compiler_lock_object
+   // and compiler_unlock_object and the (box->dhw == 0) encoding.
+   // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
+   // and showed a performance *increase*.  In the same experiment I eliminated
+   // the fast-path stack-lock code from the interpreter and always passed
+   // control to the "slow" operators in synchronizer.cpp.
+
+   // RScratch contains the fetched obj->mark value from the failed CAS.
+   sub(Rscratch, STACK_BIAS, Rscratch);
+   sub(Rscratch, SP, Rscratch);
+   assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+   andcc(Rscratch, 0xfffff003, Rscratch);
+   if (counters != NULL) {
+     // Accounting needs the Rscratch register
+     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+     cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
+     ba_short(done);
+   } else {
+     ba(done);
+     delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+   }
+
+   bind   (IsInflated);
+
+   // Try to CAS m->owner from null to Self
+   // Invariant: if we acquire the lock then _recursions should be 0.
+   add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+   mov(G2_thread, Rscratch);
+   cas_ptr(Rmark, G0, Rscratch);
+   andcc(Rscratch, Rscratch, G0);             // set ICCs for done: icc.zf iff success
+   // set icc.zf : 1=success 0=failure
+   // ST box->displaced_header = NonZero.
+   // Any non-zero value suffices:
+   //    markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
+   st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
+   // Intentional fall-through into done
 
    bind   (done);
 }
@@ -2848,30 +2745,6 @@
 
    Label done ;
 
-   if (EmitSync & 4) {
-     cmp(SP, G0);
-     return ;
-   }
-
-   if (EmitSync & 8) {
-     if (try_bias) {
-        biased_locking_exit(mark_addr, Rscratch, done);
-     }
-
-     // Test first if it is a fast recursive unlock
-     ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
-     br_null_short(Rmark, Assembler::pt, done);
-
-     // Check if it is still a light weight lock, this is is true if we see
-     // the stack address of the basicLock in the markOop of the object
-     assert(mark_addr.disp() == 0, "cas must take a zero displacement");
-     cas_ptr(mark_addr.base(), Rbox, Rmark);
-     ba(done);
-     delayed()->cmp(Rbox, Rmark);
-     bind(done);
-     return ;
-   }
-
    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
    // is too large performance rolls abruptly off a cliff.
    // This could be related to inlining policies, code cache management, or
@@ -2902,105 +2775,39 @@
    // close the resultant (and rare) race by having contended threads in
    // monitorenter periodically poll _owner.
 
-   if (EmitSync & 1024) {
-     // Emit code to check that _owner == Self
-     // We could fold the _owner test into subsequent code more efficiently
-     // than using a stand-alone check, but since _owner checking is off by
-     // default we don't bother. We also might consider predicating the
-     // _owner==Self check on Xcheck:jni or running on a debug build.
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
-     orcc(Rscratch, G0, G0);
-     brx(Assembler::notZero, false, Assembler::pn, done);
-     delayed()->nop();
-   }
-
-   if (EmitSync & 512) {
-     // classic lock release code absent 1-0 locking
-     //   m->Owner = null;
-     //   membar #storeload
-     //   if (m->cxq|m->EntryList) == null goto Success
-     //   if (m->succ != null) goto Success
-     //   if CAS (&m->Owner,0,Self) != 0 goto Success
-     //   goto SlowPath
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
-     orcc(Rbox, G0, G0);
-     brx(Assembler::notZero, false, Assembler::pn, done);
-     delayed()->nop();
-     st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-     if (os::is_MP()) { membar(StoreLoad); }
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
-     orcc(Rbox, Rscratch, G0);
-     brx(Assembler::zero, false, Assembler::pt, done);
-     delayed()->
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
-     andcc(Rscratch, Rscratch, G0);
-     brx(Assembler::notZero, false, Assembler::pt, done);
-     delayed()->andcc(G0, G0, G0);
-     add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
-     mov(G2_thread, Rscratch);
-     cas_ptr(Rmark, G0, Rscratch);
-     cmp(Rscratch, G0);
-     // invert icc.zf and goto done
-     brx(Assembler::notZero, false, Assembler::pt, done);
-     delayed()->cmp(G0, G0);
-     br(Assembler::always, false, Assembler::pt, done);
-     delayed()->cmp(G0, 1);
-   } else {
-     // 1-0 form : avoids CAS and MEMBAR in the common case
-     // Do not bother to ratify that m->Owner == Self.
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
-     orcc(Rbox, G0, G0);
-     brx(Assembler::notZero, false, Assembler::pn, done);
-     delayed()->
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
-     orcc(Rbox, Rscratch, G0);
-     if (EmitSync & 16384) {
-       // As an optional optimization, if (EntryList|cxq) != null and _succ is null then
-       // we should transfer control directly to the slow-path.
-       // This test makes the reacquire operation below very infrequent.
-       // The logic is equivalent to :
-       //   if (cxq|EntryList) == null : Owner=null; goto Success
-       //   if succ == null : goto SlowPath
-       //   Owner=null; membar #storeload
-       //   if succ != null : goto Success
-       //   if CAS(&Owner,null,Self) != null goto Success
-       //   goto SlowPath
-       brx(Assembler::zero, true, Assembler::pt, done);
-       delayed()->
-       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-       ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
-       andcc(Rscratch, Rscratch, G0) ;
-       brx(Assembler::zero, false, Assembler::pt, done);
-       delayed()->orcc(G0, 1, G0);
-       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-     } else {
-       brx(Assembler::zero, false, Assembler::pt, done);
-       delayed()->
-       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-     }
-     if (os::is_MP()) { membar(StoreLoad); }
-     // Check that _succ is (or remains) non-zero
-     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
-     andcc(Rscratch, Rscratch, G0);
-     brx(Assembler::notZero, false, Assembler::pt, done);
-     delayed()->andcc(G0, G0, G0);
-     add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
-     mov(G2_thread, Rscratch);
-     cas_ptr(Rmark, G0, Rscratch);
-     cmp(Rscratch, G0);
-     // invert icc.zf and goto done
-     // A slightly better v8+/v9 idiom would be the following:
-     //   movrnz Rscratch,1,Rscratch
-     //   ba done
-     //   xorcc Rscratch,1,G0
-     // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
-     brx(Assembler::notZero, false, Assembler::pt, done);
-     delayed()->cmp(G0, G0);
-     br(Assembler::always, false, Assembler::pt, done);
-     delayed()->cmp(G0, 1);
-   }
+   // 1-0 form : avoids CAS and MEMBAR in the common case
+   // Do not bother to ratify that m->Owner == Self.
+   ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
+   orcc(Rbox, G0, G0);
+   brx(Assembler::notZero, false, Assembler::pn, done);
+   delayed()->
+   ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
+   ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
+   orcc(Rbox, Rscratch, G0);
+   brx(Assembler::zero, false, Assembler::pt, done);
+   delayed()->
+   st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+
+   if (os::is_MP()) { membar(StoreLoad); }
+   // Check that _succ is (or remains) non-zero
+   ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
+   andcc(Rscratch, Rscratch, G0);
+   brx(Assembler::notZero, false, Assembler::pt, done);
+   delayed()->andcc(G0, G0, G0);
+   add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+   mov(G2_thread, Rscratch);
+   cas_ptr(Rmark, G0, Rscratch);
+   cmp(Rscratch, G0);
+   // invert icc.zf and goto done
+   // A slightly better v8+/v9 idiom would be the following:
+   //   movrnz Rscratch,1,Rscratch
+   //   ba done
+   //   xorcc Rscratch,1,G0
+   // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
+   brx(Assembler::notZero, false, Assembler::pt, done);
+   delayed()->cmp(G0, G0);
+   br(Assembler::always, false, Assembler::pt, done);
+   delayed()->cmp(G0, 1);
 
    bind   (LStacked);
    // Consider: we could replace the expensive CAS in the exit
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Sep 06 18:06:24 2018 -0700
@@ -1721,227 +1721,160 @@
   if (counters != NULL) {
     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
   }
-  if (EmitSync & 1) {
-      // set box->dhw = markOopDesc::unused_mark()
-      // Force all sync thru slow-path: slow_enter() and slow_exit()
-      movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
-      cmpptr (rsp, (int32_t)NULL_WORD);
-  } else {
-    // Possible cases that we'll encounter in fast_lock
-    // ------------------------------------------------
-    // * Inflated
-    //    -- unlocked
-    //    -- Locked
-    //       = by self
-    //       = by other
-    // * biased
-    //    -- by Self
-    //    -- by other
-    // * neutral
-    // * stack-locked
-    //    -- by self
-    //       = sp-proximity test hits
-    //       = sp-proximity test generates false-negative
-    //    -- by other
-    //
-
-    Label IsInflated, DONE_LABEL;
-
-    // it's stack-locked, biased or neutral
-    // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
-    // order to reduce the number of conditional branches in the most common cases.
-    // Beware -- there's a subtle invariant that fetch of the markword
-    // at [FETCH], below, will never observe a biased encoding (*101b).
-    // If this invariant is not held we risk exclusion (safety) failure.
-    if (UseBiasedLocking && !UseOptoBiasInlining) {
-      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
-    }
+
+  // Possible cases that we'll encounter in fast_lock
+  // ------------------------------------------------
+  // * Inflated
+  //    -- unlocked
+  //    -- Locked
+  //       = by self
+  //       = by other
+  // * biased
+  //    -- by Self
+  //    -- by other
+  // * neutral
+  // * stack-locked
+  //    -- by self
+  //       = sp-proximity test hits
+  //       = sp-proximity test generates false-negative
+  //    -- by other
+  //
+
+  Label IsInflated, DONE_LABEL;
+
+  // it's stack-locked, biased or neutral
+  // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+  // order to reduce the number of conditional branches in the most common cases.
+  // Beware -- there's a subtle invariant that fetch of the markword
+  // at [FETCH], below, will never observe a biased encoding (*101b).
+  // If this invariant is not held we risk exclusion (safety) failure.
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
+  }
 
 #if INCLUDE_RTM_OPT
-    if (UseRTMForStackLocks && use_rtm) {
-      rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
-                        stack_rtm_counters, method_data, profile_rtm,
-                        DONE_LABEL, IsInflated);
-    }
+  if (UseRTMForStackLocks && use_rtm) {
+    rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
+                      stack_rtm_counters, method_data, profile_rtm,
+                      DONE_LABEL, IsInflated);
+  }
 #endif // INCLUDE_RTM_OPT
 
-    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
-    testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
-    jccb(Assembler::notZero, IsInflated);
-
-    // Attempt stack-locking ...
-    orptr (tmpReg, markOopDesc::unlocked_value);
-    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-    if (os::is_MP()) {
-      lock();
-    }
-    cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
-    if (counters != NULL) {
-      cond_inc32(Assembler::equal,
-                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
-    }
-    jcc(Assembler::equal, DONE_LABEL);           // Success
-
-    // Recursive locking.
-    // The object is stack-locked: markword contains stack pointer to BasicLock.
-    // Locked by current thread if difference with current SP is less than one page.
-    subptr(tmpReg, rsp);
-    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
-    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
-    movptr(Address(boxReg, 0), tmpReg);
-    if (counters != NULL) {
-      cond_inc32(Assembler::equal,
-                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
-    }
-    jmp(DONE_LABEL);
-
-    bind(IsInflated);
-    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
+  movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
+  testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
+  jccb(Assembler::notZero, IsInflated);
+
+  // Attempt stack-locking ...
+  orptr (tmpReg, markOopDesc::unlocked_value);
+  movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
+  if (counters != NULL) {
+    cond_inc32(Assembler::equal,
+               ExternalAddress((address)counters->fast_path_entry_count_addr()));
+  }
+  jcc(Assembler::equal, DONE_LABEL);           // Success
+
+  // Recursive locking.
+  // The object is stack-locked: markword contains stack pointer to BasicLock.
+  // Locked by current thread if difference with current SP is less than one page.
+  subptr(tmpReg, rsp);
+  // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
+  andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+  movptr(Address(boxReg, 0), tmpReg);
+  if (counters != NULL) {
+    cond_inc32(Assembler::equal,
+               ExternalAddress((address)counters->fast_path_entry_count_addr()));
+  }
+  jmp(DONE_LABEL);
+
+  bind(IsInflated);
+  // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
 
 #if INCLUDE_RTM_OPT
-    // Use the same RTM locking code in 32- and 64-bit VM.
-    if (use_rtm) {
-      rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
-                           rtm_counters, method_data, profile_rtm, DONE_LABEL);
-    } else {
+  // Use the same RTM locking code in 32- and 64-bit VM.
+  if (use_rtm) {
+    rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
+                         rtm_counters, method_data, profile_rtm, DONE_LABEL);
+  } else {
 #endif // INCLUDE_RTM_OPT
 
 #ifndef _LP64
-    // The object is inflated.
-
-    // boxReg refers to the on-stack BasicLock in the current frame.
-    // We'd like to write:
-    //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
-    // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
-    // additional latency as we have another ST in the store buffer that must drain.
-
-    if (EmitSync & 8192) {
-       movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
-       get_thread (scrReg);
-       movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
-       movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
-       if (os::is_MP()) {
-         lock();
-       }
-       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-    } else
-    if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
-       // register juggle because we need tmpReg for cmpxchgptr below
-       movptr(scrReg, boxReg);
-       movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
-
-       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-          // prefetchw [eax + Offset(_owner)-2]
-          prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-       }
-
-       if ((EmitSync & 64) == 0) {
-         // Optimistic form: consider XORL tmpReg,tmpReg
-         movptr(tmpReg, NULL_WORD);
-       } else {
-         // Can suffer RTS->RTO upgrades on shared or cold $ lines
-         // Test-And-CAS instead of CAS
-         movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
-         testptr(tmpReg, tmpReg);                   // Locked ?
-         jccb  (Assembler::notZero, DONE_LABEL);
-       }
-
-       // Appears unlocked - try to swing _owner from null to non-null.
-       // Ideally, I'd manifest "Self" with get_thread and then attempt
-       // to CAS the register containing Self into m->Owner.
-       // But we don't have enough registers, so instead we can either try to CAS
-       // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
-       // we later store "Self" into m->Owner.  Transiently storing a stack address
-       // (rsp or the address of the box) into  m->owner is harmless.
-       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-       if (os::is_MP()) {
-         lock();
-       }
-       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-       movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
-       // If we weren't able to swing _owner from NULL to the BasicLock
-       // then take the slow path.
-       jccb  (Assembler::notZero, DONE_LABEL);
-       // update _owner from BasicLock to thread
-       get_thread (scrReg);                    // beware: clobbers ICCs
-       movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
-       xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
-
-       // If the CAS fails we can either retry or pass control to the slow-path.
-       // We use the latter tactic.
-       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-       // If the CAS was successful ...
-       //   Self has acquired the lock
-       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-       // Intentional fall-through into DONE_LABEL ...
-    } else {
-       movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
-       movptr(boxReg, tmpReg);
-
-       // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-       if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-          // prefetchw [eax + Offset(_owner)-2]
-          prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-       }
-
-       if ((EmitSync & 64) == 0) {
-         // Optimistic form
-         xorptr  (tmpReg, tmpReg);
-       } else {
-         // Can suffer RTS->RTO upgrades on shared or cold $ lines
-         movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
-         testptr(tmpReg, tmpReg);                   // Locked ?
-         jccb  (Assembler::notZero, DONE_LABEL);
-       }
-
-       // Appears unlocked - try to swing _owner from null to non-null.
-       // Use either "Self" (in scr) or rsp as thread identity in _owner.
-       // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
-       get_thread (scrReg);
-       if (os::is_MP()) {
-         lock();
-       }
-       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-
-       // If the CAS fails we can either retry or pass control to the slow-path.
-       // We use the latter tactic.
-       // Pass the CAS result in the icc.ZFlag into DONE_LABEL
-       // If the CAS was successful ...
-       //   Self has acquired the lock
-       //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
-       // Intentional fall-through into DONE_LABEL ...
-    }
+  // The object is inflated.
+
+  // boxReg refers to the on-stack BasicLock in the current frame.
+  // We'd like to write:
+  //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
+  // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
+  // additional latency as we have another ST in the store buffer that must drain.
+
+  // avoid ST-before-CAS
+  // register juggle because we need tmpReg for cmpxchgptr below
+  movptr(scrReg, boxReg);
+  movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
+
+  // Optimistic form: consider XORL tmpReg,tmpReg
+  movptr(tmpReg, NULL_WORD);
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  // Ideally, I'd manifest "Self" with get_thread and then attempt
+  // to CAS the register containing Self into m->Owner.
+  // But we don't have enough registers, so instead we can either try to CAS
+  // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
+  // we later store "Self" into m->Owner.  Transiently storing a stack address
+  // (rsp or the address of the box) into  m->owner is harmless.
+  // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+  movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
+  // If we weren't able to swing _owner from NULL to the BasicLock
+  // then take the slow path.
+  jccb  (Assembler::notZero, DONE_LABEL);
+  // update _owner from BasicLock to thread
+  get_thread (scrReg);                    // beware: clobbers ICCs
+  movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
+  xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
+
+  // If the CAS fails we can either retry or pass control to the slow-path.
+  // We use the latter tactic.
+  // Pass the CAS result in the icc.ZFlag into DONE_LABEL
+  // If the CAS was successful ...
+  //   Self has acquired the lock
+  //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
+  // Intentional fall-through into DONE_LABEL ...
 #else // _LP64
-    // It's inflated
-    movq(scrReg, tmpReg);
-    xorq(tmpReg, tmpReg);
-
-    if (os::is_MP()) {
-      lock();
-    }
-    cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-    // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
-    // Without cast to int32_t movptr will destroy r10 which is typically obj.
-    movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
-    // Intentional fall-through into DONE_LABEL ...
-    // Propagate ICC.ZF from CAS above into DONE_LABEL.
+  // It's inflated
+  movq(scrReg, tmpReg);
+  xorq(tmpReg, tmpReg);
+
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+  // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
+  // Without cast to int32_t movptr will destroy r10 which is typically obj.
+  movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
+  // Intentional fall-through into DONE_LABEL ...
+  // Propagate ICC.ZF from CAS above into DONE_LABEL.
 #endif // _LP64
 #if INCLUDE_RTM_OPT
-    } // use_rtm()
+  } // use_rtm()
 #endif
-    // DONE_LABEL is a hot target - we'd really like to place it at the
-    // start of cache line by padding with NOPs.
-    // See the AMD and Intel software optimization manuals for the
-    // most efficient "long" NOP encodings.
-    // Unfortunately none of our alignment mechanisms suffice.
-    bind(DONE_LABEL);
-
-    // At DONE_LABEL the icc ZFlag is set as follows ...
-    // Fast_Unlock uses the same protocol.
-    // ZFlag == 1 -> Success
-    // ZFlag == 0 -> Failure - force control through the slow-path
-  }
+  // DONE_LABEL is a hot target - we'd really like to place it at the
+  // start of cache line by padding with NOPs.
+  // See the AMD and Intel software optimization manuals for the
+  // most efficient "long" NOP encodings.
+  // Unfortunately none of our alignment mechanisms suffice.
+  bind(DONE_LABEL);
+
+  // At DONE_LABEL the icc ZFlag is set as follows ...
+  // Fast_Unlock uses the same protocol.
+  // ZFlag == 1 -> Success
+  // ZFlag == 0 -> Failure - force control through the slow-path
 }
 
 // obj: object to unlock
@@ -1980,293 +1913,179 @@
   assert(boxReg == rax, "");
   assert_different_registers(objReg, boxReg, tmpReg);
 
-  if (EmitSync & 4) {
-    // Disable - inhibit all inlining.  Force control through the slow-path
-    cmpptr (rsp, 0);
-  } else {
-    Label DONE_LABEL, Stacked, CheckSucc;
-
-    // Critically, the biased locking test must have precedence over
-    // and appear before the (box->dhw == 0) recursive stack-lock test.
-    if (UseBiasedLocking && !UseOptoBiasInlining) {
-       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-    }
+  Label DONE_LABEL, Stacked, CheckSucc;
+
+  // Critically, the biased locking test must have precedence over
+  // and appear before the (box->dhw == 0) recursive stack-lock test.
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+  }
 
 #if INCLUDE_RTM_OPT
-    if (UseRTMForStackLocks && use_rtm) {
-      assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
-      Label L_regular_unlock;
-      movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));           // fetch markword
-      andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
-      cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
-      jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
-      xend();                                       // otherwise end...
-      jmp(DONE_LABEL);                              // ... and we're done
-      bind(L_regular_unlock);
-    }
+  if (UseRTMForStackLocks && use_rtm) {
+    assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+    Label L_regular_unlock;
+    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));           // fetch markword
+    andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+    cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
+    jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
+    xend();                                       // otherwise end...
+    jmp(DONE_LABEL);                              // ... and we're done
+    bind(L_regular_unlock);
+  }
 #endif
 
-    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
-    jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
-    movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));             // Examine the object's markword
-    testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
-    jccb  (Assembler::zero, Stacked);
-
-    // It's inflated.
+  cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
+  jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
+  movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));             // Examine the object's markword
+  testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
+  jccb  (Assembler::zero, Stacked);
+
+  // It's inflated.
 #if INCLUDE_RTM_OPT
-    if (use_rtm) {
-      Label L_regular_inflated_unlock;
-      int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
-      movptr(boxReg, Address(tmpReg, owner_offset));
-      testptr(boxReg, boxReg);
-      jccb(Assembler::notZero, L_regular_inflated_unlock);
-      xend();
-      jmpb(DONE_LABEL);
-      bind(L_regular_inflated_unlock);
-    }
+  if (use_rtm) {
+    Label L_regular_inflated_unlock;
+    int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
+    movptr(boxReg, Address(tmpReg, owner_offset));
+    testptr(boxReg, boxReg);
+    jccb(Assembler::notZero, L_regular_inflated_unlock);
+    xend();
+    jmpb(DONE_LABEL);
+    bind(L_regular_inflated_unlock);
+  }
 #endif
 
-    // Despite our balanced locking property we still check that m->_owner == Self
-    // as java routines or native JNI code called by this thread might
-    // have released the lock.
-    // Refer to the comments in synchronizer.cpp for how we might encode extra
-    // state in _succ so we can avoid fetching EntryList|cxq.
-    //
-    // I'd like to add more cases in fast_lock() and fast_unlock() --
-    // such as recursive enter and exit -- but we have to be wary of
-    // I$ bloat, T$ effects and BP$ effects.
-    //
-    // If there's no contention try a 1-0 exit.  That is, exit without
-    // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
-    // we detect and recover from the race that the 1-0 exit admits.
-    //
-    // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
-    // before it STs null into _owner, releasing the lock.  Updates
-    // to data protected by the critical section must be visible before
-    // we drop the lock (and thus before any other thread could acquire
-    // the lock and observe the fields protected by the lock).
-    // IA32's memory-model is SPO, so STs are ordered with respect to
-    // each other and there's no need for an explicit barrier (fence).
-    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
+  // Despite our balanced locking property we still check that m->_owner == Self
+  // as java routines or native JNI code called by this thread might
+  // have released the lock.
+  // Refer to the comments in synchronizer.cpp for how we might encode extra
+  // state in _succ so we can avoid fetching EntryList|cxq.
+  //
+  // I'd like to add more cases in fast_lock() and fast_unlock() --
+  // such as recursive enter and exit -- but we have to be wary of
+  // I$ bloat, T$ effects and BP$ effects.
+  //
+  // If there's no contention try a 1-0 exit.  That is, exit without
+  // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+  // we detect and recover from the race that the 1-0 exit admits.
+  //
+  // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+  // before it STs null into _owner, releasing the lock.  Updates
+  // to data protected by the critical section must be visible before
+  // we drop the lock (and thus before any other thread could acquire
+  // the lock and observe the fields protected by the lock).
+  // IA32's memory-model is SPO, so STs are ordered with respect to
+  // each other and there's no need for an explicit barrier (fence).
+  // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 #ifndef _LP64
-    get_thread (boxReg);
-    if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
-      // prefetchw [ebx + Offset(_owner)-2]
-      prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-    }
-
-    // Note that we could employ various encoding schemes to reduce
-    // the number of loads below (currently 4) to just 2 or 3.
-    // Refer to the comments in synchronizer.cpp.
-    // In practice the chain of fetches doesn't seem to impact performance, however.
-    xorptr(boxReg, boxReg);
-    if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
-       // Attempt to reduce branch density - AMD's branch predictor.
-       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
-       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-       jccb  (Assembler::notZero, DONE_LABEL);
-       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
-       jmpb  (DONE_LABEL);
-    } else {
-       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
-       jccb  (Assembler::notZero, DONE_LABEL);
-       movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-       jccb  (Assembler::notZero, CheckSucc);
-       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
-       jmpb  (DONE_LABEL);
-    }
-
-    // The Following code fragment (EmitSync & 65536) improves the performance of
-    // contended applications and contended synchronization microbenchmarks.
-    // Unfortunately the emission of the code - even though not executed - causes regressions
-    // in scimark and jetstream, evidently because of $ effects.  Replacing the code
-    // with an equal number of never-executed NOPs results in the same regression.
-    // We leave it off by default.
-
-    if ((EmitSync & 65536) != 0) {
-       Label LSuccess, LGoSlowPath ;
-
-       bind  (CheckSucc);
-
-       // Optional pre-test ... it's safe to elide this
-       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
-       jccb(Assembler::zero, LGoSlowPath);
-
-       // We have a classic Dekker-style idiom:
-       //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
-       // There are a number of ways to implement the barrier:
-       // (1) lock:andl &m->_owner, 0
-       //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
-       //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
-       //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
-       // (2) If supported, an explicit MFENCE is appealing.
-       //     In older IA32 processors MFENCE is slower than lock:add or xchg
-       //     particularly if the write-buffer is full as might be the case if
-       //     if stores closely precede the fence or fence-equivalent instruction.
-       //     See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
-       //     as the situation has changed with Nehalem and Shanghai.
-       // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
-       //     The $lines underlying the top-of-stack should be in M-state.
-       //     The locked add instruction is serializing, of course.
-       // (4) Use xchg, which is serializing
-       //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
-       // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
-       //     The integer condition codes will tell us if succ was 0.
-       //     Since _succ and _owner should reside in the same $line and
-       //     we just stored into _owner, it's likely that the $line
-       //     remains in M-state for the lock:orl.
-       //
-       // We currently use (3), although it's likely that switching to (2)
-       // is correct for the future.
-
-       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
-       if (os::is_MP()) {
-         lock(); addptr(Address(rsp, 0), 0);
-       }
-       // Ratify _succ remains non-null
-       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
-       jccb  (Assembler::notZero, LSuccess);
-
-       xorptr(boxReg, boxReg);                  // box is really EAX
-       if (os::is_MP()) { lock(); }
-       cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-       // There's no successor so we tried to regrab the lock with the
-       // placeholder value. If that didn't work, then another thread
-       // grabbed the lock so we're done (and exit was a success).
-       jccb  (Assembler::notEqual, LSuccess);
-       // Since we're low on registers we installed rsp as a placeholding in _owner.
-       // Now install Self over rsp.  This is safe as we're transitioning from
-       // non-null to non=null
-       get_thread (boxReg);
-       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
-       // Intentional fall-through into LGoSlowPath ...
-
-       bind  (LGoSlowPath);
-       orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
-       jmpb  (DONE_LABEL);
-
-       bind  (LSuccess);
-       xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
-       jmpb  (DONE_LABEL);
-    }
-
-    bind (Stacked);
-    // It's not inflated and it's not recursively stack-locked and it's not biased.
-    // It must be stack-locked.
-    // Try to reset the header to displaced header.
-    // The "box" value on the stack is stable, so we can reload
-    // and be assured we observe the same value as above.
-    movptr(tmpReg, Address(boxReg, 0));
-    if (os::is_MP()) {
-      lock();
-    }
-    cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
-    // Intention fall-thru into DONE_LABEL
-
-    // DONE_LABEL is a hot target - we'd really like to place it at the
-    // start of cache line by padding with NOPs.
-    // See the AMD and Intel software optimization manuals for the
-    // most efficient "long" NOP encodings.
-    // Unfortunately none of our alignment mechanisms suffice.
-    if ((EmitSync & 65536) == 0) {
-       bind (CheckSucc);
-    }
+  get_thread (boxReg);
+
+  // Note that we could employ various encoding schemes to reduce
+  // the number of loads below (currently 4) to just 2 or 3.
+  // Refer to the comments in synchronizer.cpp.
+  // In practice the chain of fetches doesn't seem to impact performance, however.
+  xorptr(boxReg, boxReg);
+  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+  jccb  (Assembler::notZero, DONE_LABEL);
+  movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+  jccb  (Assembler::notZero, CheckSucc);
+  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+  jmpb  (DONE_LABEL);
+
+  bind (Stacked);
+  // It's not inflated and it's not recursively stack-locked and it's not biased.
+  // It must be stack-locked.
+  // Try to reset the header to displaced header.
+  // The "box" value on the stack is stable, so we can reload
+  // and be assured we observe the same value as above.
+  movptr(tmpReg, Address(boxReg, 0));
+  if (os::is_MP()) {
+    lock();
+  }
+  cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+  // Intention fall-thru into DONE_LABEL
+
+  // DONE_LABEL is a hot target - we'd really like to place it at the
+  // start of cache line by padding with NOPs.
+  // See the AMD and Intel software optimization manuals for the
+  // most efficient "long" NOP encodings.
+  // Unfortunately none of our alignment mechanisms suffice.
+  bind (CheckSucc);
 #else // _LP64
-    // It's inflated
-    if (EmitSync & 1024) {
-      // Emit code to check that _owner == Self
-      // We could fold the _owner test into subsequent code more efficiently
-      // than using a stand-alone check, but since _owner checking is off by
-      // default we don't bother. We also might consider predicating the
-      // _owner==Self check on Xcheck:jni or running on a debug build.
-      movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-      xorptr(boxReg, r15_thread);
-    } else {
-      xorptr(boxReg, boxReg);
-    }
-    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
-    jccb  (Assembler::notZero, DONE_LABEL);
-    movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-    jccb  (Assembler::notZero, CheckSucc);
-    movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
-    jmpb  (DONE_LABEL);
-
-    if ((EmitSync & 65536) == 0) {
-      // Try to avoid passing control into the slow_path ...
-      Label LSuccess, LGoSlowPath ;
-      bind  (CheckSucc);
-
-      // The following optional optimization can be elided if necessary
-      // Effectively: if (succ == null) goto SlowPath
-      // The code reduces the window for a race, however,
-      // and thus benefits performance.
-      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
-      jccb  (Assembler::zero, LGoSlowPath);
-
-      xorptr(boxReg, boxReg);
-      if ((EmitSync & 16) && os::is_MP()) {
-        xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-      } else {
-        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
-        if (os::is_MP()) {
-          // Memory barrier/fence
-          // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
-          // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
-          // This is faster on Nehalem and AMD Shanghai/Barcelona.
-          // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
-          // We might also restructure (ST Owner=0;barrier;LD _Succ) to
-          // (mov box,0; xchgq box, &m->Owner; LD _succ) .
-          lock(); addl(Address(rsp, 0), 0);
-        }
-      }
-      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
-      jccb  (Assembler::notZero, LSuccess);
-
-      // Rare inopportune interleaving - race.
-      // The successor vanished in the small window above.
-      // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
-      // We need to ensure progress and succession.
-      // Try to reacquire the lock.
-      // If that fails then the new owner is responsible for succession and this
-      // thread needs to take no further action and can exit via the fast path (success).
-      // If the re-acquire succeeds then pass control into the slow path.
-      // As implemented, this latter mode is horrible because we generated more
-      // coherence traffic on the lock *and* artifically extended the critical section
-      // length while by virtue of passing control into the slow path.
-
-      // box is really RAX -- the following CMPXCHG depends on that binding
-      // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
-      if (os::is_MP()) { lock(); }
-      cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-      // There's no successor so we tried to regrab the lock.
-      // If that didn't work, then another thread grabbed the
-      // lock so we're done (and exit was a success).
-      jccb  (Assembler::notEqual, LSuccess);
-      // Intentional fall-through into slow-path
-
-      bind  (LGoSlowPath);
-      orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
-      jmpb  (DONE_LABEL);
-
-      bind  (LSuccess);
-      testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
-      jmpb  (DONE_LABEL);
-    }
-
-    bind  (Stacked);
-    movptr(tmpReg, Address (boxReg, 0));      // re-fetch
-    if (os::is_MP()) { lock(); }
-    cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
-
-    if (EmitSync & 65536) {
-       bind (CheckSucc);
-    }
+  // It's inflated
+  xorptr(boxReg, boxReg);
+  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+  jccb  (Assembler::notZero, DONE_LABEL);
+  movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+  jccb  (Assembler::notZero, CheckSucc);
+  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
+  jmpb  (DONE_LABEL);
+
+  // Try to avoid passing control into the slow_path ...
+  Label LSuccess, LGoSlowPath ;
+  bind  (CheckSucc);
+
+  // The following optional optimization can be elided if necessary
+  // Effectively: if (succ == null) goto SlowPath
+  // The code reduces the window for a race, however,
+  // and thus benefits performance.
+  cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
+  jccb  (Assembler::zero, LGoSlowPath);
+
+  xorptr(boxReg, boxReg);
+  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
+  if (os::is_MP()) {
+    // Memory barrier/fence
+    // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
+    // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
+    // This is faster on Nehalem and AMD Shanghai/Barcelona.
+    // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
+    // We might also restructure (ST Owner=0;barrier;LD _Succ) to
+    // (mov box,0; xchgq box, &m->Owner; LD _succ) .
+    lock(); addl(Address(rsp, 0), 0);
+  }
+  cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
+  jccb  (Assembler::notZero, LSuccess);
+
+  // Rare inopportune interleaving - race.
+  // The successor vanished in the small window above.
+  // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
+  // We need to ensure progress and succession.
+  // Try to reacquire the lock.
+  // If that fails then the new owner is responsible for succession and this
+  // thread needs to take no further action and can exit via the fast path (success).
+  // If the re-acquire succeeds then pass control into the slow path.
+  // As implemented, this latter mode is horrible because we generated more
+  // coherence traffic on the lock *and* artifically extended the critical section
+  // length while by virtue of passing control into the slow path.
+
+  // box is really RAX -- the following CMPXCHG depends on that binding
+  // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
+  if (os::is_MP()) { lock(); }
+  cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+  // There's no successor so we tried to regrab the lock.
+  // If that didn't work, then another thread grabbed the
+  // lock so we're done (and exit was a success).
+  jccb  (Assembler::notEqual, LSuccess);
+  // Intentional fall-through into slow-path
+
+  bind  (LGoSlowPath);
+  orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+  jmpb  (DONE_LABEL);
+
+  bind  (LSuccess);
+  testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
+  jmpb  (DONE_LABEL);
+
+  bind  (Stacked);
+  movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+  if (os::is_MP()) { lock(); }
+  cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+
 #endif
-    bind(DONE_LABEL);
-  }
+  bind(DONE_LABEL);
 }
 #endif // COMPILER2
 
--- a/src/hotspot/share/runtime/arguments.cpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/share/runtime/arguments.cpp	Thu Sep 06 18:06:24 2018 -0700
@@ -574,6 +574,7 @@
   { "PrintSafepointStatisticsCount", JDK_Version::jdk(11),     JDK_Version::jdk(12), JDK_Version::jdk(13) },
   { "TransmitErrorReport",           JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
   { "ErrorReportServer",             JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
+  { "EmitSync",                      JDK_Version::undefined(), JDK_Version::jdk(12), JDK_Version::jdk(13) },
 
 #ifdef TEST_VERIFY_SPECIAL_JVM_FLAGS
   { "dep > obs",                    JDK_Version::jdk(9), JDK_Version::jdk(8), JDK_Version::undefined() },
@@ -3977,7 +3978,7 @@
     }
   }
 #ifdef COMPILER2
-  if (!UseBiasedLocking || EmitSync != 0) {
+  if (!UseBiasedLocking) {
     UseOptoBiasInlining = false;
   }
 #endif
--- a/src/hotspot/share/runtime/globals.hpp	Thu Sep 06 16:15:32 2018 -0700
+++ b/src/hotspot/share/runtime/globals.hpp	Thu Sep 06 18:06:24 2018 -0700
@@ -830,10 +830,6 @@
   experimental(ccstr, SyncKnobs, NULL,                                      \
                "(Unstable) Various monitor synchronization tunables")       \
                                                                             \
-  experimental(intx, EmitSync, 0,                                           \
-               "(Unsafe, Unstable) "                                        \
-               "Control emission of inline sync fast-path code")            \
-                                                                            \
   product(intx, MonitorBound, 0, "Bound Monitor population")                \
           range(0, max_jint)                                                \
                                                                             \