src/hotspot/cpu/x86/macroAssembler_x86.cpp
changeset 59156 14fa9e70ae71
parent 58977 c6a789f495fe
equal deleted inserted replaced
59154:0c2e1808f800 59156:14fa9e70ae71
  1530   assert(tmpReg == rax, "");
  1530   assert(tmpReg == rax, "");
  1531   assert(scrReg == rdx, "");
  1531   assert(scrReg == rdx, "");
  1532   Label L_rtm_retry, L_decrement_retry, L_on_abort;
  1532   Label L_rtm_retry, L_decrement_retry, L_on_abort;
  1533   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
  1533   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
  1534 
  1534 
  1535   // Without cast to int32_t a movptr will destroy r10 which is typically obj
  1535   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
  1536   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
  1536   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
  1537   movptr(boxReg, tmpReg); // Save ObjectMonitor address
  1537   movptr(boxReg, tmpReg); // Save ObjectMonitor address
  1538 
  1538 
  1539   if (RTMRetryCount > 0) {
  1539   if (RTMRetryCount > 0) {
  1540     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
  1540     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
  1600   }
  1600   }
  1601 }
  1601 }
  1602 
  1602 
  1603 #endif //  INCLUDE_RTM_OPT
  1603 #endif //  INCLUDE_RTM_OPT
  1604 
  1604 
  1605 // Fast_Lock and Fast_Unlock used by C2
  1605 // fast_lock and fast_unlock used by C2
  1606 
  1606 
  1607 // Because the transitions from emitted code to the runtime
  1607 // Because the transitions from emitted code to the runtime
  1608 // monitorenter/exit helper stubs are so slow it's critical that
  1608 // monitorenter/exit helper stubs are so slow it's critical that
  1609 // we inline both the stack-locking fast-path and the inflated fast path.
  1609 // we inline both the stack-locking fast path and the inflated fast path.
  1610 //
  1610 //
  1611 // See also: cmpFastLock and cmpFastUnlock.
  1611 // See also: cmpFastLock and cmpFastUnlock.
  1612 //
  1612 //
  1613 // What follows is a specialized inline transliteration of the code
  1613 // What follows is a specialized inline transliteration of the code
  1614 // in enter() and exit(). If we're concerned about I$ bloat another
  1614 // in enter() and exit(). If we're concerned about I$ bloat another
  1615 // option would be to emit TrySlowEnter and TrySlowExit methods
  1615 // option would be to emit TrySlowEnter and TrySlowExit methods
  1616 // at startup-time.  These methods would accept arguments as
  1616 // at startup-time.  These methods would accept arguments as
  1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
  1618 // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
  1618 // indications in the icc.ZFlag.  fast_lock and fast_unlock would simply
  1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
  1620 // In practice, however, the # of lock sites is bounded and is usually small.
  1620 // In practice, however, the # of lock sites is bounded and is usually small.
  1621 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  1621 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
  1622 // if the processor uses simple bimodal branch predictors keyed by EIP
  1622 // if the processor uses simple bimodal branch predictors keyed by EIP
  1623 // Since the helper routines would be called from multiple synchronization
  1623 // Since the helper routines would be called from multiple synchronization
  1632 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  1632 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
  1633 // (b) explicit barriers or fence operations.
  1633 // (b) explicit barriers or fence operations.
  1634 //
  1634 //
  1635 // TODO:
  1635 // TODO:
  1636 //
  1636 //
  1637 // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
  1637 // *  Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr).
  1638 //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
  1638 //    This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals.
  1639 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  1639 //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
  1640 //    the lock operators would typically be faster than reifying Self.
  1640 //    the lock operators would typically be faster than reifying Self.
  1641 //
  1641 //
  1642 // *  Ideally I'd define the primitives as:
  1642 // *  Ideally I'd define the primitives as:
  1643 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  1643 //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
  1659 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  1659 //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
  1660 //
  1660 //
  1661 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  1661 // *  use jccb and jmpb instead of jcc and jmp to improve code density.
  1662 //    But beware of excessive branch density on AMD Opterons.
  1662 //    But beware of excessive branch density on AMD Opterons.
  1663 //
  1663 //
  1664 // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
  1664 // *  Both fast_lock and fast_unlock set the ICC.ZF to indicate success
  1665 //    or failure of the fast-path.  If the fast-path fails then we pass
  1665 //    or failure of the fast path.  If the fast path fails then we pass
  1666 //    control to the slow-path, typically in C.  In Fast_Lock and
  1666 //    control to the slow path, typically in C.  In fast_lock and
  1667 //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
  1667 //    fast_unlock we often branch to DONE_LABEL, just to find that C2
  1668 //    will emit a conditional branch immediately after the node.
  1668 //    will emit a conditional branch immediately after the node.
  1669 //    So we have branches to branches and lots of ICC.ZF games.
  1669 //    So we have branches to branches and lots of ICC.ZF games.
  1670 //    Instead, it might be better to have C2 pass a "FailureLabel"
  1670 //    Instead, it might be better to have C2 pass a "FailureLabel"
  1671 //    into Fast_Lock and Fast_Unlock.  In the case of success, control
  1671 //    into fast_lock and fast_unlock.  In the case of success, control
  1672 //    will drop through the node.  ICC.ZF is undefined at exit.
  1672 //    will drop through the node.  ICC.ZF is undefined at exit.
  1673 //    In the case of failure, the node will branch directly to the
  1673 //    In the case of failure, the node will branch directly to the
  1674 //    FailureLabel
  1674 //    FailureLabel
  1675 
  1675 
  1676 
  1676 
  1811   // update _owner from BasicLock to thread
  1811   // update _owner from BasicLock to thread
  1812   get_thread (scrReg);                    // beware: clobbers ICCs
  1812   get_thread (scrReg);                    // beware: clobbers ICCs
  1813   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
  1813   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
  1814   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
  1814   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
  1815 
  1815 
  1816   // If the CAS fails we can either retry or pass control to the slow-path.
  1816   // If the CAS fails we can either retry or pass control to the slow path.
  1817   // We use the latter tactic.
  1817   // We use the latter tactic.
  1818   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
  1818   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
  1819   // If the CAS was successful ...
  1819   // If the CAS was successful ...
  1820   //   Self has acquired the lock
  1820   //   Self has acquired the lock
  1821   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
  1821   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
  1822   // Intentional fall-through into DONE_LABEL ...
  1822   // Intentional fall-through into DONE_LABEL ...
  1823 #else // _LP64
  1823 #else // _LP64
  1824   // It's inflated
  1824   // It's inflated and we use scrReg for ObjectMonitor* in this section.
  1825   movq(scrReg, tmpReg);
  1825   movq(scrReg, tmpReg);
  1826   xorq(tmpReg, tmpReg);
  1826   xorq(tmpReg, tmpReg);
  1827 
       
  1828   lock();
  1827   lock();
  1829   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1828   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  1830   // Unconditionally set box->_displaced_header = markWord::unused_mark().
  1829   // Unconditionally set box->_displaced_header = markWord::unused_mark().
  1831   // Without cast to int32_t movptr will destroy r10 which is typically obj.
  1830   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
  1832   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
  1831   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
  1833   // Intentional fall-through into DONE_LABEL ...
  1832   // Intentional fall-through into DONE_LABEL ...
  1834   // Propagate ICC.ZF from CAS above into DONE_LABEL.
  1833   // Propagate ICC.ZF from CAS above into DONE_LABEL.
  1835 #endif // _LP64
  1834 #endif // _LP64
  1836 #if INCLUDE_RTM_OPT
  1835 #if INCLUDE_RTM_OPT
  1842   // most efficient "long" NOP encodings.
  1841   // most efficient "long" NOP encodings.
  1843   // Unfortunately none of our alignment mechanisms suffice.
  1842   // Unfortunately none of our alignment mechanisms suffice.
  1844   bind(DONE_LABEL);
  1843   bind(DONE_LABEL);
  1845 
  1844 
  1846   // At DONE_LABEL the icc ZFlag is set as follows ...
  1845   // At DONE_LABEL the icc ZFlag is set as follows ...
  1847   // Fast_Unlock uses the same protocol.
  1846   // fast_unlock uses the same protocol.
  1848   // ZFlag == 1 -> Success
  1847   // ZFlag == 1 -> Success
  1849   // ZFlag == 0 -> Failure - force control through the slow-path
  1848   // ZFlag == 0 -> Failure - force control through the slow path
  1850 }
  1849 }
  1851 
  1850 
  1852 // obj: object to unlock
  1851 // obj: object to unlock
  1853 // box: box address (displaced header location), killed.  Must be EAX.
  1852 // box: box address (displaced header location), killed.  Must be EAX.
  1854 // tmp: killed, cannot be obj nor box.
  1853 // tmp: killed, cannot be obj nor box.
  1855 //
  1854 //
  1856 // Some commentary on balanced locking:
  1855 // Some commentary on balanced locking:
  1857 //
  1856 //
  1858 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
  1857 // fast_lock and fast_unlock are emitted only for provably balanced lock sites.
  1859 // Methods that don't have provably balanced locking are forced to run in the
  1858 // Methods that don't have provably balanced locking are forced to run in the
  1860 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  1859 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
  1861 // The interpreter provides two properties:
  1860 // The interpreter provides two properties:
  1862 // I1:  At return-time the interpreter automatically and quietly unlocks any
  1861 // I1:  At return-time the interpreter automatically and quietly unlocks any
  1863 //      objects acquired the current activation (frame).  Recall that the
  1862 //      objects acquired the current activation (frame).  Recall that the
  1874 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  1873 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
  1875 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  1874 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
  1876 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  1875 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
  1877 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  1876 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
  1878 // Arguably given that the spec legislates the JNI case as undefined our implementation
  1877 // Arguably given that the spec legislates the JNI case as undefined our implementation
  1879 // could reasonably *avoid* checking owner in Fast_Unlock().
  1878 // could reasonably *avoid* checking owner in fast_unlock().
  1880 // In the interest of performance we elide m->Owner==Self check in unlock.
  1879 // In the interest of performance we elide m->Owner==Self check in unlock.
  1881 // A perfectly viable alternative is to elide the owner check except when
  1880 // A perfectly viable alternative is to elide the owner check except when
  1882 // Xcheck:jni is enabled.
  1881 // Xcheck:jni is enabled.
  1883 
  1882 
  1884 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
  1883 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
  1939   //
  1938   //
  1940   // If there's no contention try a 1-0 exit.  That is, exit without
  1939   // If there's no contention try a 1-0 exit.  That is, exit without
  1941   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  1940   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
  1942   // we detect and recover from the race that the 1-0 exit admits.
  1941   // we detect and recover from the race that the 1-0 exit admits.
  1943   //
  1942   //
  1944   // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
  1943   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
  1945   // before it STs null into _owner, releasing the lock.  Updates
  1944   // before it STs null into _owner, releasing the lock.  Updates
  1946   // to data protected by the critical section must be visible before
  1945   // to data protected by the critical section must be visible before
  1947   // we drop the lock (and thus before any other thread could acquire
  1946   // we drop the lock (and thus before any other thread could acquire
  1948   // the lock and observe the fields protected by the lock).
  1947   // the lock and observe the fields protected by the lock).
  1949   // IA32's memory-model is SPO, so STs are ordered with respect to
  1948   // IA32's memory-model is SPO, so STs are ordered with respect to
  1988   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  1987   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
  1989   jccb  (Assembler::notZero, DONE_LABEL);
  1988   jccb  (Assembler::notZero, DONE_LABEL);
  1990   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  1989   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
  1991   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  1990   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
  1992   jccb  (Assembler::notZero, CheckSucc);
  1991   jccb  (Assembler::notZero, CheckSucc);
       
  1992   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
  1993   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  1993   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  1994   jmpb  (DONE_LABEL);
  1994   jmpb  (DONE_LABEL);
  1995 
  1995 
  1996   // Try to avoid passing control into the slow_path ...
  1996   // Try to avoid passing control into the slow_path ...
  1997   Label LSuccess, LGoSlowPath ;
  1997   Label LSuccess, LGoSlowPath ;
  1998   bind  (CheckSucc);
  1998   bind  (CheckSucc);
  1999 
  1999 
  2000   // The following optional optimization can be elided if necessary
  2000   // The following optional optimization can be elided if necessary
  2001   // Effectively: if (succ == null) goto SlowPath
  2001   // Effectively: if (succ == null) goto slow path
  2002   // The code reduces the window for a race, however,
  2002   // The code reduces the window for a race, however,
  2003   // and thus benefits performance.
  2003   // and thus benefits performance.
  2004   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2004   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
  2005   jccb  (Assembler::zero, LGoSlowPath);
  2005   jccb  (Assembler::zero, LGoSlowPath);
  2006 
  2006 
  2007   xorptr(boxReg, boxReg);
  2007   xorptr(boxReg, boxReg);
       
  2008   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
  2008   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2009   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
  2009 
  2010 
  2010   // Memory barrier/fence
  2011   // Memory barrier/fence
  2011   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
  2012   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
  2012   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
  2013   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
  2037   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2038   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2038   // There's no successor so we tried to regrab the lock.
  2039   // There's no successor so we tried to regrab the lock.
  2039   // If that didn't work, then another thread grabbed the
  2040   // If that didn't work, then another thread grabbed the
  2040   // lock so we're done (and exit was a success).
  2041   // lock so we're done (and exit was a success).
  2041   jccb  (Assembler::notEqual, LSuccess);
  2042   jccb  (Assembler::notEqual, LSuccess);
  2042   // Intentional fall-through into slow-path
  2043   // Intentional fall-through into slow path
  2043 
  2044 
  2044   bind  (LGoSlowPath);
  2045   bind  (LGoSlowPath);
  2045   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
  2046   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
  2046   jmpb  (DONE_LABEL);
  2047   jmpb  (DONE_LABEL);
  2047 
  2048