src/hotspot/cpu/x86/macroAssembler_x86.cpp
changeset 51663 a65d8a6fa424
parent 51633 21154cb84d2a
child 51756 4bd35a5ec694
equal deleted inserted replaced
51662:fe4349d27282 51663:a65d8a6fa424
  1719   }
  1719   }
  1720 
  1720 
  1721   if (counters != NULL) {
  1721   if (counters != NULL) {
  1722     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
  1722     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
  1723   }
  1723   }
  1724   if (EmitSync & 1) {
  1724 
  1725       // set box->dhw = markOopDesc::unused_mark()
  1725   // Possible cases that we'll encounter in fast_lock
  1726       // Force all sync thru slow-path: slow_enter() and slow_exit()
  1726   // ------------------------------------------------
  1727       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
  1727   // * Inflated
  1728       cmpptr (rsp, (int32_t)NULL_WORD);
  1728   //    -- unlocked
  1729   } else {
  1729   //    -- Locked
  1730     // Possible cases that we'll encounter in fast_lock
  1730   //       = by self
  1731     // ------------------------------------------------
  1731   //       = by other
  1732     // * Inflated
  1732   // * biased
  1733     //    -- unlocked
  1733   //    -- by Self
  1734     //    -- Locked
  1734   //    -- by other
  1735     //       = by self
  1735   // * neutral
  1736     //       = by other
  1736   // * stack-locked
  1737     // * biased
  1737   //    -- by self
  1738     //    -- by Self
  1738   //       = sp-proximity test hits
  1739     //    -- by other
  1739   //       = sp-proximity test generates false-negative
  1740     // * neutral
  1740   //    -- by other
  1741     // * stack-locked
  1741   //
  1742     //    -- by self
  1742 
  1743     //       = sp-proximity test hits
  1743   Label IsInflated, DONE_LABEL;
  1744     //       = sp-proximity test generates false-negative
  1744 
  1745     //    -- by other
  1745   // it's stack-locked, biased or neutral
  1746     //
  1746   // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  1747 
  1747   // order to reduce the number of conditional branches in the most common cases.
  1748     Label IsInflated, DONE_LABEL;
  1748   // Beware -- there's a subtle invariant that fetch of the markword
  1749 
  1749   // at [FETCH], below, will never observe a biased encoding (*101b).
  1750     // it's stack-locked, biased or neutral
  1750   // If this invariant is not held we risk exclusion (safety) failure.
  1751     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
  1751   if (UseBiasedLocking && !UseOptoBiasInlining) {
  1752     // order to reduce the number of conditional branches in the most common cases.
  1752     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
  1753     // Beware -- there's a subtle invariant that fetch of the markword
  1753   }
  1754     // at [FETCH], below, will never observe a biased encoding (*101b).
       
  1755     // If this invariant is not held we risk exclusion (safety) failure.
       
  1756     if (UseBiasedLocking && !UseOptoBiasInlining) {
       
  1757       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
       
  1758     }
       
  1759 
  1754 
  1760 #if INCLUDE_RTM_OPT
  1755 #if INCLUDE_RTM_OPT
  1761     if (UseRTMForStackLocks && use_rtm) {
  1756   if (UseRTMForStackLocks && use_rtm) {
  1762       rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
  1757     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
  1763                         stack_rtm_counters, method_data, profile_rtm,
  1758                       stack_rtm_counters, method_data, profile_rtm,
  1764                         DONE_LABEL, IsInflated);
  1759                       DONE_LABEL, IsInflated);
  1765     }
  1760   }
  1766 #endif // INCLUDE_RTM_OPT
  1761 #endif // INCLUDE_RTM_OPT
  1767 
  1762 
  1768     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
  1763   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
  1769     testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
  1764   testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
  1770     jccb(Assembler::notZero, IsInflated);
  1765   jccb(Assembler::notZero, IsInflated);
  1771 
  1766 
  1772     // Attempt stack-locking ...
  1767   // Attempt stack-locking ...
  1773     orptr (tmpReg, markOopDesc::unlocked_value);
  1768   orptr (tmpReg, markOopDesc::unlocked_value);
  1774     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
  1769   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
  1775     if (os::is_MP()) {
  1770   if (os::is_MP()) {
  1776       lock();
  1771     lock();
  1777     }
  1772   }
  1778     cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
  1773   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
  1779     if (counters != NULL) {
  1774   if (counters != NULL) {
  1780       cond_inc32(Assembler::equal,
  1775     cond_inc32(Assembler::equal,
  1781                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
  1776                ExternalAddress((address)counters->fast_path_entry_count_addr()));
  1782     }
  1777   }
  1783     jcc(Assembler::equal, DONE_LABEL);           // Success
  1778   jcc(Assembler::equal, DONE_LABEL);           // Success
  1784 
  1779 
  1785     // Recursive locking.
  1780   // Recursive locking.
  1786     // The object is stack-locked: markword contains stack pointer to BasicLock.
  1781   // The object is stack-locked: markword contains stack pointer to BasicLock.
  1787     // Locked by current thread if difference with current SP is less than one page.
  1782   // Locked by current thread if difference with current SP is less than one page.
  1788     subptr(tmpReg, rsp);
  1783   subptr(tmpReg, rsp);
  1789     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
  1784   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
  1790     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
  1785   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
  1791     movptr(Address(boxReg, 0), tmpReg);
  1786   movptr(Address(boxReg, 0), tmpReg);
  1792     if (counters != NULL) {
  1787   if (counters != NULL) {
  1793       cond_inc32(Assembler::equal,
  1788     cond_inc32(Assembler::equal,
  1794                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
  1789                ExternalAddress((address)counters->fast_path_entry_count_addr()));
  1795     }
  1790   }
  1796     jmp(DONE_LABEL);
  1791   jmp(DONE_LABEL);
  1797 
  1792 
  1798     bind(IsInflated);
  1793   bind(IsInflated);
  1799     // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
  1794   // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
  1800 
  1795 
  1801 #if INCLUDE_RTM_OPT
  1796 #if INCLUDE_RTM_OPT
  1802     // Use the same RTM locking code in 32- and 64-bit VM.
  1797   // Use the same RTM locking code in 32- and 64-bit VM.
  1803     if (use_rtm) {
  1798   if (use_rtm) {
  1804       rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
  1799     rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
  1805                            rtm_counters, method_data, profile_rtm, DONE_LABEL);
  1800                          rtm_counters, method_data, profile_rtm, DONE_LABEL);
  1806     } else {
  1801   } else {
  1807 #endif // INCLUDE_RTM_OPT
  1802 #endif // INCLUDE_RTM_OPT
  1808 
  1803 
  1809 #ifndef _LP64
  1804 #ifndef _LP64
  1810     // The object is inflated.
  1805   // The object is inflated.
  1811 
  1806 
  1812     // boxReg refers to the on-stack BasicLock in the current frame.
  1807   // boxReg refers to the on-stack BasicLock in the current frame.
  1813     // We'd like to write:
  1808   // We'd like to write:
  1814     //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
  1809   //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
  1815     // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
  1810   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
  1816     // additional latency as we have another ST in the store buffer that must drain.
  1811   // additional latency as we have another ST in the store buffer that must drain.
  1817 
  1812 
  1818     if (EmitSync & 8192) {
  1813   // avoid ST-before-CAS
  1819        movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
  1814   // register juggle because we need tmpReg for cmpxchgptr below
  1820        get_thread (scrReg);
  1815   movptr(scrReg, boxReg);
  1821        movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
  1816   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
  1822        movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
  1817 
  1823        if (os::is_MP()) {
  1818   // Optimistic form: consider XORL tmpReg,tmpReg
  1824          lock();
  1819   movptr(tmpReg, NULL_WORD);
  1825        }
  1820 
  1826        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1821   // Appears unlocked - try to swing _owner from null to non-null.
  1827     } else
  1822   // Ideally, I'd manifest "Self" with get_thread and then attempt
  1828     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
  1823   // to CAS the register containing Self into m->Owner.
  1829        // register juggle because we need tmpReg for cmpxchgptr below
  1824   // But we don't have enough registers, so instead we can either try to CAS
  1830        movptr(scrReg, boxReg);
  1825   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
  1831        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
  1826   // we later store "Self" into m->Owner.  Transiently storing a stack address
  1832 
  1827   // (rsp or the address of the box) into  m->owner is harmless.
  1833        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
  1828   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
  1834        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
  1829   if (os::is_MP()) {
  1835           // prefetchw [eax + Offset(_owner)-2]
  1830     lock();
  1836           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1831   }
  1837        }
  1832   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1838 
  1833   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
  1839        if ((EmitSync & 64) == 0) {
  1834   // If we weren't able to swing _owner from NULL to the BasicLock
  1840          // Optimistic form: consider XORL tmpReg,tmpReg
  1835   // then take the slow path.
  1841          movptr(tmpReg, NULL_WORD);
  1836   jccb  (Assembler::notZero, DONE_LABEL);
  1842        } else {
  1837   // update _owner from BasicLock to thread
  1843          // Can suffer RTS->RTO upgrades on shared or cold $ lines
  1838   get_thread (scrReg);                    // beware: clobbers ICCs
  1844          // Test-And-CAS instead of CAS
  1839   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
  1845          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
  1840   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
  1846          testptr(tmpReg, tmpReg);                   // Locked ?
  1841 
  1847          jccb  (Assembler::notZero, DONE_LABEL);
  1842   // If the CAS fails we can either retry or pass control to the slow-path.
  1848        }
  1843   // We use the latter tactic.
  1849 
  1844   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
  1850        // Appears unlocked - try to swing _owner from null to non-null.
  1845   // If the CAS was successful ...
  1851        // Ideally, I'd manifest "Self" with get_thread and then attempt
  1846   //   Self has acquired the lock
  1852        // to CAS the register containing Self into m->Owner.
  1847   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
  1853        // But we don't have enough registers, so instead we can either try to CAS
  1848   // Intentional fall-through into DONE_LABEL ...
  1854        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
       
  1855        // we later store "Self" into m->Owner.  Transiently storing a stack address
       
  1856        // (rsp or the address of the box) into  m->owner is harmless.
       
  1857        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
       
  1858        if (os::is_MP()) {
       
  1859          lock();
       
  1860        }
       
  1861        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  1862        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
       
  1863        // If we weren't able to swing _owner from NULL to the BasicLock
       
  1864        // then take the slow path.
       
  1865        jccb  (Assembler::notZero, DONE_LABEL);
       
  1866        // update _owner from BasicLock to thread
       
  1867        get_thread (scrReg);                    // beware: clobbers ICCs
       
  1868        movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
       
  1869        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
       
  1870 
       
  1871        // If the CAS fails we can either retry or pass control to the slow-path.
       
  1872        // We use the latter tactic.
       
  1873        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
       
  1874        // If the CAS was successful ...
       
  1875        //   Self has acquired the lock
       
  1876        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
       
  1877        // Intentional fall-through into DONE_LABEL ...
       
  1878     } else {
       
  1879        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
       
  1880        movptr(boxReg, tmpReg);
       
  1881 
       
  1882        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
       
  1883        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
       
  1884           // prefetchw [eax + Offset(_owner)-2]
       
  1885           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  1886        }
       
  1887 
       
  1888        if ((EmitSync & 64) == 0) {
       
  1889          // Optimistic form
       
  1890          xorptr  (tmpReg, tmpReg);
       
  1891        } else {
       
  1892          // Can suffer RTS->RTO upgrades on shared or cold $ lines
       
  1893          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
       
  1894          testptr(tmpReg, tmpReg);                   // Locked ?
       
  1895          jccb  (Assembler::notZero, DONE_LABEL);
       
  1896        }
       
  1897 
       
  1898        // Appears unlocked - try to swing _owner from null to non-null.
       
  1899        // Use either "Self" (in scr) or rsp as thread identity in _owner.
       
  1900        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
       
  1901        get_thread (scrReg);
       
  1902        if (os::is_MP()) {
       
  1903          lock();
       
  1904        }
       
  1905        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  1906 
       
  1907        // If the CAS fails we can either retry or pass control to the slow-path.
       
  1908        // We use the latter tactic.
       
  1909        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
       
  1910        // If the CAS was successful ...
       
  1911        //   Self has acquired the lock
       
  1912        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
       
  1913        // Intentional fall-through into DONE_LABEL ...
       
  1914     }
       
  1915 #else // _LP64
  1849 #else // _LP64
  1916     // It's inflated
  1850   // It's inflated
  1917     movq(scrReg, tmpReg);
  1851   movq(scrReg, tmpReg);
  1918     xorq(tmpReg, tmpReg);
  1852   xorq(tmpReg, tmpReg);
  1919 
  1853 
  1920     if (os::is_MP()) {
  1854   if (os::is_MP()) {
  1921       lock();
  1855     lock();
  1922     }
  1856   }
  1923     cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1857   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1924     // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
  1858   // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
  1925     // Without cast to int32_t movptr will destroy r10 which is typically obj.
  1859   // Without cast to int32_t movptr will destroy r10 which is typically obj.
  1926     movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
  1860   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
  1927     // Intentional fall-through into DONE_LABEL ...
  1861   // Intentional fall-through into DONE_LABEL ...
  1928     // Propagate ICC.ZF from CAS above into DONE_LABEL.
  1862   // Propagate ICC.ZF from CAS above into DONE_LABEL.
  1929 #endif // _LP64
  1863 #endif // _LP64
  1930 #if INCLUDE_RTM_OPT
  1864 #if INCLUDE_RTM_OPT
  1931     } // use_rtm()
  1865   } // use_rtm()
  1932 #endif
  1866 #endif
  1933     // DONE_LABEL is a hot target - we'd really like to place it at the
  1867   // DONE_LABEL is a hot target - we'd really like to place it at the
  1934     // start of cache line by padding with NOPs.
  1868   // start of cache line by padding with NOPs.
  1935     // See the AMD and Intel software optimization manuals for the
  1869   // See the AMD and Intel software optimization manuals for the
  1936     // most efficient "long" NOP encodings.
  1870   // most efficient "long" NOP encodings.
  1937     // Unfortunately none of our alignment mechanisms suffice.
  1871   // Unfortunately none of our alignment mechanisms suffice.
  1938     bind(DONE_LABEL);
  1872   bind(DONE_LABEL);
  1939 
  1873 
  1940     // At DONE_LABEL the icc ZFlag is set as follows ...
  1874   // At DONE_LABEL the icc ZFlag is set as follows ...
  1941     // Fast_Unlock uses the same protocol.
  1875   // Fast_Unlock uses the same protocol.
  1942     // ZFlag == 1 -> Success
  1876   // ZFlag == 1 -> Success
  1943     // ZFlag == 0 -> Failure - force control through the slow-path
  1877   // ZFlag == 0 -> Failure - force control through the slow-path
  1944   }
       
  1945 }
  1878 }
  1946 
  1879 
  1947 // obj: object to unlock
  1880 // obj: object to unlock
  1948 // box: box address (displaced header location), killed.  Must be EAX.
  1881 // box: box address (displaced header location), killed.  Must be EAX.
  1949 // tmp: killed, cannot be obj nor box.
  1882 // tmp: killed, cannot be obj nor box.
  1978 
  1911 
  1979 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
  1912 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
  1980   assert(boxReg == rax, "");
  1913   assert(boxReg == rax, "");
  1981   assert_different_registers(objReg, boxReg, tmpReg);
  1914   assert_different_registers(objReg, boxReg, tmpReg);
  1982 
  1915 
  1983   if (EmitSync & 4) {
  1916   Label DONE_LABEL, Stacked, CheckSucc;
  1984     // Disable - inhibit all inlining.  Force control through the slow-path
  1917 
  1985     cmpptr (rsp, 0);
  1918   // Critically, the biased locking test must have precedence over
  1986   } else {
  1919   // and appear before the (box->dhw == 0) recursive stack-lock test.
  1987     Label DONE_LABEL, Stacked, CheckSucc;
  1920   if (UseBiasedLocking && !UseOptoBiasInlining) {
  1988 
  1921     biased_locking_exit(objReg, tmpReg, DONE_LABEL);
  1989     // Critically, the biased locking test must have precedence over
  1922   }
  1990     // and appear before the (box->dhw == 0) recursive stack-lock test.
       
  1991     if (UseBiasedLocking && !UseOptoBiasInlining) {
       
  1992        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
       
  1993     }
       
  1994 
  1923 
  1995 #if INCLUDE_RTM_OPT
  1924 #if INCLUDE_RTM_OPT
  1996     if (UseRTMForStackLocks && use_rtm) {
  1925   if (UseRTMForStackLocks && use_rtm) {
  1997       assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
  1926     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
  1998       Label L_regular_unlock;
  1927     Label L_regular_unlock;
  1999       movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));           // fetch markword
  1928     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));           // fetch markword
  2000       andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
  1929     andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
  2001       cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
  1930     cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
  2002       jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
  1931     jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock
  2003       xend();                                       // otherwise end...
  1932     xend();                                       // otherwise end...
  2004       jmp(DONE_LABEL);                              // ... and we're done
  1933     jmp(DONE_LABEL);                              // ... and we're done
  2005       bind(L_regular_unlock);
  1934     bind(L_regular_unlock);
  2006     }
  1935   }
  2007 #endif
  1936 #endif
  2008 
  1937 
  2009     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
  1938   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
  2010     jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
  1939   jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
  2011     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));             // Examine the object's markword
  1940   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));             // Examine the object's markword
  2012     testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
  1941   testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
  2013     jccb  (Assembler::zero, Stacked);
  1942   jccb  (Assembler::zero, Stacked);
  2014 
  1943 
  2015     // It's inflated.
  1944   // It's inflated.
  2016 #if INCLUDE_RTM_OPT
  1945 #if INCLUDE_RTM_OPT
  2017     if (use_rtm) {
  1946   if (use_rtm) {
  2018       Label L_regular_inflated_unlock;
  1947     Label L_regular_inflated_unlock;
  2019       int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
  1948     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
  2020       movptr(boxReg, Address(tmpReg, owner_offset));
  1949     movptr(boxReg, Address(tmpReg, owner_offset));
  2021       testptr(boxReg, boxReg);
  1950     testptr(boxReg, boxReg);
  2022       jccb(Assembler::notZero, L_regular_inflated_unlock);
  1951     jccb(Assembler::notZero, L_regular_inflated_unlock);
  2023       xend();
  1952     xend();
  2024       jmpb(DONE_LABEL);
  1953     jmpb(DONE_LABEL);
  2025       bind(L_regular_inflated_unlock);
  1954     bind(L_regular_inflated_unlock);
  2026     }
  1955   }
  2027 #endif
  1956 #endif
  2028 
  1957 
  2029     // Despite our balanced locking property we still check that m->_owner == Self
  1958   // Despite our balanced locking property we still check that m->_owner == Self
  2030     // as java routines or native JNI code called by this thread might
  1959   // as java routines or native JNI code called by this thread might
  2031     // have released the lock.
  1960   // have released the lock.
  2032     // Refer to the comments in synchronizer.cpp for how we might encode extra
  1961   // Refer to the comments in synchronizer.cpp for how we might encode extra
  2033     // state in _succ so we can avoid fetching EntryList|cxq.
  1962   // state in _succ so we can avoid fetching EntryList|cxq.
  2034     //
  1963   //
  2035     // I'd like to add more cases in fast_lock() and fast_unlock() --
  1964   // I'd like to add more cases in fast_lock() and fast_unlock() --
  2036     // such as recursive enter and exit -- but we have to be wary of
  1965   // such as recursive enter and exit -- but we have to be wary of
  2037     // I$ bloat, T$ effects and BP$ effects.
  1966   // I$ bloat, T$ effects and BP$ effects.
  2038     //
  1967   //
  2039     // If there's no contention try a 1-0 exit.  That is, exit without
  1968   // If there's no contention try a 1-0 exit.  That is, exit without
  2040     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  1969   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  2041     // we detect and recover from the race that the 1-0 exit admits.
  1970   // we detect and recover from the race that the 1-0 exit admits.
  2042     //
  1971   //
  2043     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  1972   // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  2044     // before it STs null into _owner, releasing the lock.  Updates
  1973   // before it STs null into _owner, releasing the lock.  Updates
  2045     // to data protected by the critical section must be visible before
  1974   // to data protected by the critical section must be visible before
  2046     // we drop the lock (and thus before any other thread could acquire
  1975   // we drop the lock (and thus before any other thread could acquire
  2047     // the lock and observe the fields protected by the lock).
  1976   // the lock and observe the fields protected by the lock).
  2048     // IA32's memory-model is SPO, so STs are ordered with respect to
  1977   // IA32's memory-model is SPO, so STs are ordered with respect to
  2049     // each other and there's no need for an explicit barrier (fence).
  1978   // each other and there's no need for an explicit barrier (fence).
  2050     // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  1979   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
  2051 #ifndef _LP64
  1980 #ifndef _LP64
  2052     get_thread (boxReg);
  1981   get_thread (boxReg);
  2053     if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
  1982 
  2054       // prefetchw [ebx + Offset(_owner)-2]
  1983   // Note that we could employ various encoding schemes to reduce
  2055       prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1984   // the number of loads below (currently 4) to just 2 or 3.
  2056     }
  1985   // Refer to the comments in synchronizer.cpp.
  2057 
  1986   // In practice the chain of fetches doesn't seem to impact performance, however.
  2058     // Note that we could employ various encoding schemes to reduce
  1987   xorptr(boxReg, boxReg);
  2059     // the number of loads below (currently 4) to just 2 or 3.
  1988   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  2060     // Refer to the comments in synchronizer.cpp.
  1989   jccb  (Assembler::notZero, DONE_LABEL);
  2061     // In practice the chain of fetches doesn't seem to impact performance, however.
  1990   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  2062     xorptr(boxReg, boxReg);
  1991   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  2063     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
  1992   jccb  (Assembler::notZero, CheckSucc);
  2064        // Attempt to reduce branch density - AMD's branch predictor.
  1993   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
  2065        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  1994   jmpb  (DONE_LABEL);
  2066        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  1995 
  2067        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  1996   bind (Stacked);
  2068        jccb  (Assembler::notZero, DONE_LABEL);
  1997   // It's not inflated and it's not recursively stack-locked and it's not biased.
  2069        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
  1998   // It must be stack-locked.
  2070        jmpb  (DONE_LABEL);
  1999   // Try to reset the header to displaced header.
  2071     } else {
  2000   // The "box" value on the stack is stable, so we can reload
  2072        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  2001   // and be assured we observe the same value as above.
  2073        jccb  (Assembler::notZero, DONE_LABEL);
  2002   movptr(tmpReg, Address(boxReg, 0));
  2074        movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  2003   if (os::is_MP()) {
  2075        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  2004     lock();
  2076        jccb  (Assembler::notZero, CheckSucc);
  2005   }
  2077        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
  2006   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
  2078        jmpb  (DONE_LABEL);
  2007   // Intention fall-thru into DONE_LABEL
  2079     }
  2008 
  2080 
  2009   // DONE_LABEL is a hot target - we'd really like to place it at the
  2081     // The Following code fragment (EmitSync & 65536) improves the performance of
  2010   // start of cache line by padding with NOPs.
  2082     // contended applications and contended synchronization microbenchmarks.
  2011   // See the AMD and Intel software optimization manuals for the
  2083     // Unfortunately the emission of the code - even though not executed - causes regressions
  2012   // most efficient "long" NOP encodings.
  2084     // in scimark and jetstream, evidently because of $ effects.  Replacing the code
  2013   // Unfortunately none of our alignment mechanisms suffice.
  2085     // with an equal number of never-executed NOPs results in the same regression.
  2014   bind (CheckSucc);
  2086     // We leave it off by default.
       
  2087 
       
  2088     if ((EmitSync & 65536) != 0) {
       
  2089        Label LSuccess, LGoSlowPath ;
       
  2090 
       
  2091        bind  (CheckSucc);
       
  2092 
       
  2093        // Optional pre-test ... it's safe to elide this
       
  2094        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
       
  2095        jccb(Assembler::zero, LGoSlowPath);
       
  2096 
       
  2097        // We have a classic Dekker-style idiom:
       
  2098        //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
       
  2099        // There are a number of ways to implement the barrier:
       
  2100        // (1) lock:andl &m->_owner, 0
       
  2101        //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
       
  2102        //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
       
  2103        //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
       
  2104        // (2) If supported, an explicit MFENCE is appealing.
       
  2105        //     In older IA32 processors MFENCE is slower than lock:add or xchg
       
  2106        //     particularly if the write-buffer is full as might be the case if
       
  2107        //     if stores closely precede the fence or fence-equivalent instruction.
       
  2108        //     See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
       
  2109        //     as the situation has changed with Nehalem and Shanghai.
       
  2110        // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
       
  2111        //     The $lines underlying the top-of-stack should be in M-state.
       
  2112        //     The locked add instruction is serializing, of course.
       
  2113        // (4) Use xchg, which is serializing
       
  2114        //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
       
  2115        // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
       
  2116        //     The integer condition codes will tell us if succ was 0.
       
  2117        //     Since _succ and _owner should reside in the same $line and
       
  2118        //     we just stored into _owner, it's likely that the $line
       
  2119        //     remains in M-state for the lock:orl.
       
  2120        //
       
  2121        // We currently use (3), although it's likely that switching to (2)
       
  2122        // is correct for the future.
       
  2123 
       
  2124        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
       
  2125        if (os::is_MP()) {
       
  2126          lock(); addptr(Address(rsp, 0), 0);
       
  2127        }
       
  2128        // Ratify _succ remains non-null
       
  2129        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
       
  2130        jccb  (Assembler::notZero, LSuccess);
       
  2131 
       
  2132        xorptr(boxReg, boxReg);                  // box is really EAX
       
  2133        if (os::is_MP()) { lock(); }
       
  2134        cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  2135        // There's no successor so we tried to regrab the lock with the
       
  2136        // placeholder value. If that didn't work, then another thread
       
  2137        // grabbed the lock so we're done (and exit was a success).
       
  2138        jccb  (Assembler::notEqual, LSuccess);
       
  2139        // Since we're low on registers we installed rsp as a placeholding in _owner.
       
  2140        // Now install Self over rsp.  This is safe as we're transitioning from
       
  2141        // non-null to non=null
       
  2142        get_thread (boxReg);
       
  2143        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
       
  2144        // Intentional fall-through into LGoSlowPath ...
       
  2145 
       
  2146        bind  (LGoSlowPath);
       
  2147        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
       
  2148        jmpb  (DONE_LABEL);
       
  2149 
       
  2150        bind  (LSuccess);
       
  2151        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
       
  2152        jmpb  (DONE_LABEL);
       
  2153     }
       
  2154 
       
  2155     bind (Stacked);
       
  2156     // It's not inflated and it's not recursively stack-locked and it's not biased.
       
  2157     // It must be stack-locked.
       
  2158     // Try to reset the header to displaced header.
       
  2159     // The "box" value on the stack is stable, so we can reload
       
  2160     // and be assured we observe the same value as above.
       
  2161     movptr(tmpReg, Address(boxReg, 0));
       
  2162     if (os::is_MP()) {
       
  2163       lock();
       
  2164     }
       
  2165     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
       
  2166     // Intention fall-thru into DONE_LABEL
       
  2167 
       
  2168     // DONE_LABEL is a hot target - we'd really like to place it at the
       
  2169     // start of cache line by padding with NOPs.
       
  2170     // See the AMD and Intel software optimization manuals for the
       
  2171     // most efficient "long" NOP encodings.
       
  2172     // Unfortunately none of our alignment mechanisms suffice.
       
  2173     if ((EmitSync & 65536) == 0) {
       
  2174        bind (CheckSucc);
       
  2175     }
       
  2176 #else // _LP64
  2015 #else // _LP64
  2177     // It's inflated
  2016   // It's inflated
  2178     if (EmitSync & 1024) {
  2017   xorptr(boxReg, boxReg);
  2179       // Emit code to check that _owner == Self
  2018   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  2180       // We could fold the _owner test into subsequent code more efficiently
  2019   jccb  (Assembler::notZero, DONE_LABEL);
  2181       // than using a stand-alone check, but since _owner checking is off by
  2020   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  2182       // default we don't bother. We also might consider predicating the
  2021   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  2183       // _owner==Self check on Xcheck:jni or running on a debug build.
  2022   jccb  (Assembler::notZero, CheckSucc);
  2184       movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2023   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2185       xorptr(boxReg, r15_thread);
  2024   jmpb  (DONE_LABEL);
  2186     } else {
  2025 
  2187       xorptr(boxReg, boxReg);
  2026   // Try to avoid passing control into the slow_path ...
  2188     }
  2027   Label LSuccess, LGoSlowPath ;
  2189     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  2028   bind  (CheckSucc);
  2190     jccb  (Assembler::notZero, DONE_LABEL);
  2029 
  2191     movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  2030   // The following optional optimization can be elided if necessary
  2192     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  2031   // Effectively: if (succ == null) goto SlowPath
  2193     jccb  (Assembler::notZero, CheckSucc);
  2032   // The code reduces the window for a race, however,
  2194     movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2033   // and thus benefits performance.
  2195     jmpb  (DONE_LABEL);
  2034   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2196 
  2035   jccb  (Assembler::zero, LGoSlowPath);
  2197     if ((EmitSync & 65536) == 0) {
  2036 
  2198       // Try to avoid passing control into the slow_path ...
  2037   xorptr(boxReg, boxReg);
  2199       Label LSuccess, LGoSlowPath ;
  2038   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2200       bind  (CheckSucc);
  2039   if (os::is_MP()) {
  2201 
  2040     // Memory barrier/fence
  2202       // The following optional optimization can be elided if necessary
  2041     // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
  2203       // Effectively: if (succ == null) goto SlowPath
  2042     // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
  2204       // The code reduces the window for a race, however,
  2043     // This is faster on Nehalem and AMD Shanghai/Barcelona.
  2205       // and thus benefits performance.
  2044     // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
  2206       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2045     // We might also restructure (ST Owner=0;barrier;LD _Succ) to
  2207       jccb  (Assembler::zero, LGoSlowPath);
  2046     // (mov box,0; xchgq box, &m->Owner; LD _succ) .
  2208 
  2047     lock(); addl(Address(rsp, 0), 0);
  2209       xorptr(boxReg, boxReg);
  2048   }
  2210       if ((EmitSync & 16) && os::is_MP()) {
  2049   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2211         xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2050   jccb  (Assembler::notZero, LSuccess);
  2212       } else {
  2051 
  2213         movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2052   // Rare inopportune interleaving - race.
  2214         if (os::is_MP()) {
  2053   // The successor vanished in the small window above.
  2215           // Memory barrier/fence
  2054   // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
  2216           // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
  2055   // We need to ensure progress and succession.
  2217           // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
  2056   // Try to reacquire the lock.
  2218           // This is faster on Nehalem and AMD Shanghai/Barcelona.
  2057   // If that fails then the new owner is responsible for succession and this
  2219           // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
  2058   // thread needs to take no further action and can exit via the fast path (success).
  2220           // We might also restructure (ST Owner=0;barrier;LD _Succ) to
  2059   // If the re-acquire succeeds then pass control into the slow path.
  2221           // (mov box,0; xchgq box, &m->Owner; LD _succ) .
  2060   // As implemented, this latter mode is horrible because we generated more
  2222           lock(); addl(Address(rsp, 0), 0);
  2061   // coherence traffic on the lock *and* artifically extended the critical section
  2223         }
  2062   // length while by virtue of passing control into the slow path.
  2224       }
  2063 
  2225       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2064   // box is really RAX -- the following CMPXCHG depends on that binding
  2226       jccb  (Assembler::notZero, LSuccess);
  2065   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
  2227 
  2066   if (os::is_MP()) { lock(); }
  2228       // Rare inopportune interleaving - race.
  2067   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2229       // The successor vanished in the small window above.
  2068   // There's no successor so we tried to regrab the lock.
  2230       // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
  2069   // If that didn't work, then another thread grabbed the
  2231       // We need to ensure progress and succession.
  2070   // lock so we're done (and exit was a success).
  2232       // Try to reacquire the lock.
  2071   jccb  (Assembler::notEqual, LSuccess);
  2233       // If that fails then the new owner is responsible for succession and this
  2072   // Intentional fall-through into slow-path
  2234       // thread needs to take no further action and can exit via the fast path (success).
  2073 
  2235       // If the re-acquire succeeds then pass control into the slow path.
  2074   bind  (LGoSlowPath);
  2236       // As implemented, this latter mode is horrible because we generated more
  2075   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
  2237       // coherence traffic on the lock *and* artifically extended the critical section
  2076   jmpb  (DONE_LABEL);
  2238       // length while by virtue of passing control into the slow path.
  2077 
  2239 
  2078   bind  (LSuccess);
  2240       // box is really RAX -- the following CMPXCHG depends on that binding
  2079   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
  2241       // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
  2080   jmpb  (DONE_LABEL);
  2242       if (os::is_MP()) { lock(); }
  2081 
  2243       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2082   bind  (Stacked);
  2244       // There's no successor so we tried to regrab the lock.
  2083   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
  2245       // If that didn't work, then another thread grabbed the
  2084   if (os::is_MP()) { lock(); }
  2246       // lock so we're done (and exit was a success).
  2085   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
  2247       jccb  (Assembler::notEqual, LSuccess);
  2086 
  2248       // Intentional fall-through into slow-path
       
  2249 
       
  2250       bind  (LGoSlowPath);
       
  2251       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
       
  2252       jmpb  (DONE_LABEL);
       
  2253 
       
  2254       bind  (LSuccess);
       
  2255       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
       
  2256       jmpb  (DONE_LABEL);
       
  2257     }
       
  2258 
       
  2259     bind  (Stacked);
       
  2260     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
       
  2261     if (os::is_MP()) { lock(); }
       
  2262     cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
       
  2263 
       
  2264     if (EmitSync & 65536) {
       
  2265        bind (CheckSucc);
       
  2266     }
       
  2267 #endif
  2087 #endif
  2268     bind(DONE_LABEL);
  2088   bind(DONE_LABEL);
  2269   }
       
  2270 }
  2089 }
  2271 #endif // COMPILER2
  2090 #endif // COMPILER2
  2272 
  2091 
  2273 void MacroAssembler::c2bool(Register x) {
  2092 void MacroAssembler::c2bool(Register x) {
  2274   // implements x == 0 ? 0 : 1
  2093   // implements x == 0 ? 0 : 1