8062851: cleanup ObjectMonitor offset adjustments
authordcubed
Thu, 06 Nov 2014 13:05:14 -0800
changeset 27608 80d91e264baf
parent 27479 389ec536ece9
child 27609 d24e9561d2a4
8062851: cleanup ObjectMonitor offset adjustments Summary: JEP-143/JDK-8046133 - cleanup computation of ObjectMonitor field pointers Reviewed-by: dholmes, redestad, coleenp
hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
hotspot/src/share/vm/runtime/objectMonitor.hpp
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Nov 06 13:57:26 2014 +0000
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Nov 06 13:05:14 2014 -0800
@@ -2734,12 +2734,12 @@
 // box->dhw disposition - post-conditions at DONE_LABEL.
 // -   Successful inflated lock:  box->dhw != 0.
 //     Any non-zero value suffices.
-//     Consider G2_thread, rsp, boxReg, or unused_mark()
+//     Consider G2_thread, rsp, boxReg, or markOopDesc::unused_mark()
 // -   Successful Stack-lock: box->dhw == mark.
 //     box->dhw must contain the displaced mark word value
 // -   Failure -- icc.ZFlag == 0 and box->dhw is undefined.
 //     The slow-path fast_enter() and slow_enter() operators
-//     are responsible for setting box->dhw = NonZero (typically ::unused_mark).
+//     are responsible for setting box->dhw = NonZero (typically markOopDesc::unused_mark()).
 // -   Biased: box->dhw is undefined
 //
 // SPARC refworkload performance - specifically jetstream and scimark - are
@@ -2855,7 +2855,7 @@
          // If m->owner != null goto IsLocked
          // Pessimistic form: Test-and-CAS vs CAS
          // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
          andcc(Rscratch, Rscratch, G0);
          brx(Assembler::notZero, false, Assembler::pn, done);
          delayed()->nop();
@@ -2864,7 +2864,7 @@
 
       // Try to CAS m->owner from null to Self
       // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       mov(G2_thread, Rscratch);
       cas_ptr(Rmark, G0, Rscratch);
       cmp(Rscratch, G0);
@@ -2948,7 +2948,7 @@
          // Test-and-CAS vs CAS
          // Pessimistic form avoids futile (doomed) CAS attempts
          // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
          andcc(Rscratch, Rscratch, G0);
          brx(Assembler::notZero, false, Assembler::pn, done);
          delayed()->nop();
@@ -2957,13 +2957,13 @@
 
       // Try to CAS m->owner from null to Self
       // Invariant: if we acquire the lock then _recursions should be 0.
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       mov(G2_thread, Rscratch);
       cas_ptr(Rmark, G0, Rscratch);
       cmp(Rscratch, G0);
       // ST box->displaced_header = NonZero.
       // Any non-zero value suffices:
-      //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
+      //    markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
       st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
       // Intentional fall-through into done
    }
@@ -3031,30 +3031,30 @@
    // Note that we use 1-0 locking by default for the inflated case.  We
    // close the resultant (and rare) race by having contented threads in
    // monitorenter periodically poll _owner.
-   ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-   ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
+   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
+   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), Rbox);
    xor3(Rscratch, G2_thread, Rscratch);
    orcc(Rbox, Rscratch, Rbox);
    brx(Assembler::notZero, false, Assembler::pn, done);
    delayed()->
-   ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
-   ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
+   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList), Rscratch);
+   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq), Rbox);
    orcc(Rbox, Rscratch, G0);
    if (EmitSync & 65536) {
       Label LSucc ;
       brx(Assembler::notZero, false, Assembler::pn, LSucc);
       delayed()->nop();
       ba(done);
-      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
 
       bind(LSucc);
-      st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
       if (os::is_MP()) { membar (StoreLoad); }
-      ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
+      ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ), Rscratch);
       andcc(Rscratch, Rscratch, G0);
       brx(Assembler::notZero, false, Assembler::pt, done);
       delayed()->andcc(G0, G0, G0);
-      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       mov(G2_thread, Rscratch);
       cas_ptr(Rmark, G0, Rscratch);
       // invert icc.zf and goto done
@@ -3066,7 +3066,7 @@
       brx(Assembler::notZero, false, Assembler::pn, done);
       delayed()->nop();
       ba(done);
-      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
    }
 
    bind   (LStacked);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Nov 06 13:57:26 2014 +0000
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Nov 06 13:05:14 2014 -0800
@@ -1450,8 +1450,7 @@
 void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
                                             Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
   Label SpinLoop, SpinExit, doneRetry;
-  // Clean monitor_value bit to get valid pointer
-  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+  int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 
   testl(retry_count_Reg, retry_count_Reg);
   jccb(Assembler::zero, doneRetry);
@@ -1532,7 +1531,7 @@
 // Use RTM for inflating locks
 // inputs: objReg (object to lock)
 //         boxReg (on-stack box address (displaced header location) - KILLED)
-//         tmpReg (ObjectMonitor address + 2(monitor_value))
+//         tmpReg (ObjectMonitor address + markOopDesc::monitor_value)
 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
                                           Register scrReg, Register retry_on_busy_count_Reg,
                                           Register retry_on_abort_count_Reg,
@@ -1543,8 +1542,7 @@
   assert(tmpReg == rax, "");
   assert(scrReg == rdx, "");
   Label L_rtm_retry, L_decrement_retry, L_on_abort;
-  // Clean monitor_value bit to get valid pointer
-  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+  int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
 
   // Without cast to int32_t a movptr will destroy r10 which is typically obj
   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
@@ -1716,7 +1714,7 @@
     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
   }
   if (EmitSync & 1) {
-      // set box->dhw = unused_mark (3)
+      // set box->dhw = markOopDesc::unused_mark()
       // Force all sync thru slow-path: slow_enter() and slow_exit()
       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
       cmpptr (rsp, (int32_t)NULL_WORD);
@@ -1811,7 +1809,7 @@
     jmp(DONE_LABEL);
 
     bind(IsInflated);
-    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + 2(monitor_value)
+    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
 
 #if INCLUDE_RTM_OPT
     // Use the same RTM locking code in 32- and 64-bit VM.
@@ -1823,25 +1821,10 @@
 
 #ifndef _LP64
     // The object is inflated.
-    //
-    // TODO-FIXME: eliminate the ugly use of manifest constants:
-    //   Use markOopDesc::monitor_value instead of "2".
-    //   use markOop::unused_mark() instead of "3".
-    // The tmpReg value is an objectMonitor reference ORed with
-    // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
-    // objectmonitor pointer by masking off the "2" bit or we can just
-    // use tmpReg as an objectmonitor pointer but bias the objectmonitor
-    // field offsets with "-2" to compensate for and annul the low-order tag bit.
-    //
-    // I use the latter as it avoids AGI stalls.
-    // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
-    // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
-    //
-    #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
 
     // boxReg refers to the on-stack BasicLock in the current frame.
     // We'd like to write:
-    //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
+    //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
     // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
     // additional latency as we have another ST in the store buffer that must drain.
 
@@ -1853,7 +1836,7 @@
        if (os::is_MP()) {
          lock();
        }
-       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
     } else
     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
        movptr(scrReg, boxReg);
@@ -1862,7 +1845,7 @@
        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
           // prefetchw [eax + Offset(_owner)-2]
-          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+          prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        }
 
        if ((EmitSync & 64) == 0) {
@@ -1871,7 +1854,7 @@
        } else {
          // Can suffer RTS->RTO upgrades on shared or cold $ lines
          // Test-And-CAS instead of CAS
-         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
          testptr(tmpReg, tmpReg);                   // Locked ?
          jccb  (Assembler::notZero, DONE_LABEL);
        }
@@ -1887,11 +1870,11 @@
        if (os::is_MP()) {
          lock();
        }
-       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
        jccb  (Assembler::notZero, DONE_LABEL);
        get_thread (scrReg);                    // beware: clobbers ICCs
-       movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg);
+       movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
 
        // If the CAS fails we can either retry or pass control to the slow-path.
@@ -1908,7 +1891,7 @@
        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
           // prefetchw [eax + Offset(_owner)-2]
-          prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+          prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        }
 
        if ((EmitSync & 64) == 0) {
@@ -1916,7 +1899,7 @@
          xorptr  (tmpReg, tmpReg);
        } else {
          // Can suffer RTS->RTO upgrades on shared or cold $ lines
-         movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));   // rax, = m->_owner
+         movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
          testptr(tmpReg, tmpReg);                   // Locked ?
          jccb  (Assembler::notZero, DONE_LABEL);
        }
@@ -1928,7 +1911,7 @@
        if (os::is_MP()) {
          lock();
        }
-       cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
 
        // If the CAS fails we can either retry or pass control to the slow-path.
        // We use the latter tactic.
@@ -1951,7 +1934,7 @@
     movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
 
     movptr (boxReg, tmpReg);
-    movptr (tmpReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    movptr(tmpReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
     testptr(tmpReg, tmpReg);
     jccb   (Assembler::notZero, DONE_LABEL);
 
@@ -1959,7 +1942,7 @@
     if (os::is_MP()) {
       lock();
     }
-    cmpxchgptr(r15_thread, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    cmpxchgptr(r15_thread, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
     // Intentional fall-through into DONE_LABEL ...
 #endif // _LP64
 
@@ -2065,8 +2048,7 @@
 #if INCLUDE_RTM_OPT
     if (use_rtm) {
       Label L_regular_inflated_unlock;
-      // Clean monitor_value bit to get valid pointer
-      int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+      int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
       movptr(boxReg, Address(tmpReg, owner_offset));
       testptr(boxReg, boxReg);
       jccb(Assembler::notZero, L_regular_inflated_unlock);
@@ -2102,7 +2084,7 @@
     get_thread (boxReg);
     if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
       // prefetchw [ebx + Offset(_owner)-2]
-      prefetchw(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+      prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
     }
 
     // Note that we could employ various encoding schemes to reduce
@@ -2111,21 +2093,21 @@
     // In practice the chain of fetches doesn't seem to impact performance, however.
     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
        // Attempt to reduce branch density - AMD's branch predictor.
-       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
-       orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
-       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
        jccb  (Assembler::notZero, DONE_LABEL);
-       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
        jmpb  (DONE_LABEL);
     } else {
-       xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-       orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+       xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
        jccb  (Assembler::notZero, DONE_LABEL);
-       movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
-       orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
+       movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+       orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
        jccb  (Assembler::notZero, CheckSucc);
-       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
        jmpb  (DONE_LABEL);
     }
 
@@ -2143,7 +2125,7 @@
 
        // Optional pre-test ... it's safe to elide this
        if ((EmitSync & 16) == 0) {
-          cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+          cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
           jccb  (Assembler::zero, LGoSlowPath);
        }
 
@@ -2173,7 +2155,7 @@
        // We currently use (3), although it's likely that switching to (2)
        // is correct for the future.
 
-       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD);
+       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
        if (os::is_MP()) {
           if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
             mfence();
@@ -2182,18 +2164,18 @@
           }
        }
        // Ratify _succ remains non-null
-       cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0);
+       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
        jccb  (Assembler::notZero, LSuccess);
 
        xorptr(boxReg, boxReg);                  // box is really EAX
        if (os::is_MP()) { lock(); }
-       cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+       cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        jccb  (Assembler::notEqual, LSuccess);
        // Since we're low on registers we installed rsp as a placeholding in _owner.
        // Now install Self over rsp.  This is safe as we're transitioning from
        // non-null to non=null
        get_thread (boxReg);
-       movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg);
+       movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
        // Intentional fall-through into LGoSlowPath ...
 
        bind  (LGoSlowPath);
@@ -2228,36 +2210,36 @@
     }
 #else // _LP64
     // It's inflated
-    movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+    movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
     xorptr(boxReg, r15_thread);
-    orptr (boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2));
+    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
     jccb  (Assembler::notZero, DONE_LABEL);
-    movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
-    orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
+    movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+    orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
     jccb  (Assembler::notZero, CheckSucc);
-    movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+    movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
     jmpb  (DONE_LABEL);
 
     if ((EmitSync & 65536) == 0) {
       Label LSuccess, LGoSlowPath ;
       bind  (CheckSucc);
-      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
       jccb  (Assembler::zero, LGoSlowPath);
 
       // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
       // the explicit ST;MEMBAR combination, but masm doesn't currently support
       // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
       // are all faster when the write buffer is populated.
-      movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
       if (os::is_MP()) {
          lock (); addl (Address(rsp, 0), 0);
       }
-      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
+      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
       jccb  (Assembler::notZero, LSuccess);
 
       movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
       if (os::is_MP()) { lock(); }
-      cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
+      cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       jccb  (Assembler::notEqual, LSuccess);
       // Intentional fall-through into slow-path
 
--- a/hotspot/src/share/vm/runtime/objectMonitor.hpp	Thu Nov 06 13:57:26 2014 +0000
+++ b/hotspot/src/share/vm/runtime/objectMonitor.hpp	Thu Nov 06 13:05:14 2014 -0800
@@ -228,6 +228,20 @@
   static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible); }
   static int Spinner_offset_in_bytes()     { return offset_of(ObjectMonitor, _Spinner); }
 
+  // ObjectMonitor references can be ORed with markOopDesc::monitor_value
+  // as part of the ObjectMonitor tagging mechanism. When we combine an
+  // ObjectMonitor reference with an offset, we need to remove the tag
+  // value in order to generate the proper address.
+  //
+  // We can either adjust the ObjectMonitor reference and then add the
+  // offset or we can adjust the offset that is added to the ObjectMonitor
+  // reference. The latter avoids an AGI (Address Generation Interlock)
+  // stall so the helper macro adjusts the offset value that is returned
+  // to the ObjectMonitor reference manipulation code:
+  //
+  #define OM_OFFSET_NO_MONITOR_VALUE_TAG(f) \
+    ((ObjectMonitor::f ## _offset_in_bytes()) - markOopDesc::monitor_value)
+
   // Eventually we'll make provisions for multiple callbacks, but
   // now one will suffice.
   static int (*SpinCallbackFunction)(intptr_t, int);