1530 assert(tmpReg == rax, ""); |
1530 assert(tmpReg == rax, ""); |
1531 assert(scrReg == rdx, ""); |
1531 assert(scrReg == rdx, ""); |
1532 Label L_rtm_retry, L_decrement_retry, L_on_abort; |
1532 Label L_rtm_retry, L_decrement_retry, L_on_abort; |
1533 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); |
1533 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); |
1534 |
1534 |
1535 // Without cast to int32_t a movptr will destroy r10 which is typically obj |
1535 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
1536 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
1536 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
1537 movptr(boxReg, tmpReg); // Save ObjectMonitor address |
1537 movptr(boxReg, tmpReg); // Save ObjectMonitor address |
1538 |
1538 |
1539 if (RTMRetryCount > 0) { |
1539 if (RTMRetryCount > 0) { |
1540 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy |
1540 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy |
1600 } |
1600 } |
1601 } |
1601 } |
1602 |
1602 |
1603 #endif // INCLUDE_RTM_OPT |
1603 #endif // INCLUDE_RTM_OPT |
1604 |
1604 |
1605 // Fast_Lock and Fast_Unlock used by C2 |
1605 // fast_lock and fast_unlock used by C2 |
1606 |
1606 |
1607 // Because the transitions from emitted code to the runtime |
1607 // Because the transitions from emitted code to the runtime |
1608 // monitorenter/exit helper stubs are so slow it's critical that |
1608 // monitorenter/exit helper stubs are so slow it's critical that |
1609 // we inline both the stack-locking fast-path and the inflated fast path. |
1609 // we inline both the stack-locking fast path and the inflated fast path. |
1610 // |
1610 // |
1611 // See also: cmpFastLock and cmpFastUnlock. |
1611 // See also: cmpFastLock and cmpFastUnlock. |
1612 // |
1612 // |
1613 // What follows is a specialized inline transliteration of the code |
1613 // What follows is a specialized inline transliteration of the code |
1614 // in enter() and exit(). If we're concerned about I$ bloat another |
1614 // in enter() and exit(). If we're concerned about I$ bloat another |
1615 // option would be to emit TrySlowEnter and TrySlowExit methods |
1615 // option would be to emit TrySlowEnter and TrySlowExit methods |
1616 // at startup-time. These methods would accept arguments as |
1616 // at startup-time. These methods would accept arguments as |
1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure |
1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure |
1618 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply |
1618 // indications in the icc.ZFlag. fast_lock and fast_unlock would simply |
1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. |
1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. |
1620 // In practice, however, the # of lock sites is bounded and is usually small. |
1620 // In practice, however, the # of lock sites is bounded and is usually small. |
1621 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer |
1621 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer |
1622 // if the processor uses simple bimodal branch predictors keyed by EIP |
1622 // if the processor uses simple bimodal branch predictors keyed by EIP |
1623 // Since the helper routines would be called from multiple synchronization |
1623 // Since the helper routines would be called from multiple synchronization |
1632 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and |
1632 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and |
1633 // (b) explicit barriers or fence operations. |
1633 // (b) explicit barriers or fence operations. |
1634 // |
1634 // |
1635 // TODO: |
1635 // TODO: |
1636 // |
1636 // |
1637 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). |
1637 // * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr). |
1638 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. |
1638 // This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals. |
1639 // Given TLAB allocation, Self is usually manifested in a register, so passing it into |
1639 // Given TLAB allocation, Self is usually manifested in a register, so passing it into |
1640 // the lock operators would typically be faster than reifying Self. |
1640 // the lock operators would typically be faster than reifying Self. |
1641 // |
1641 // |
1642 // * Ideally I'd define the primitives as: |
1642 // * Ideally I'd define the primitives as: |
1643 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. |
1643 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. |
1659 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). |
1659 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). |
1660 // |
1660 // |
1661 // * use jccb and jmpb instead of jcc and jmp to improve code density. |
1661 // * use jccb and jmpb instead of jcc and jmp to improve code density. |
1662 // But beware of excessive branch density on AMD Opterons. |
1662 // But beware of excessive branch density on AMD Opterons. |
1663 // |
1663 // |
1664 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success |
1664 // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success |
1665 // or failure of the fast-path. If the fast-path fails then we pass |
1665 // or failure of the fast path. If the fast path fails then we pass |
1666 // control to the slow-path, typically in C. In Fast_Lock and |
1666 // control to the slow path, typically in C. In fast_lock and |
1667 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 |
1667 // fast_unlock we often branch to DONE_LABEL, just to find that C2 |
1668 // will emit a conditional branch immediately after the node. |
1668 // will emit a conditional branch immediately after the node. |
1669 // So we have branches to branches and lots of ICC.ZF games. |
1669 // So we have branches to branches and lots of ICC.ZF games. |
1670 // Instead, it might be better to have C2 pass a "FailureLabel" |
1670 // Instead, it might be better to have C2 pass a "FailureLabel" |
1671 // into Fast_Lock and Fast_Unlock. In the case of success, control |
1671 // into fast_lock and fast_unlock. In the case of success, control |
1672 // will drop through the node. ICC.ZF is undefined at exit. |
1672 // will drop through the node. ICC.ZF is undefined at exit. |
1673 // In the case of failure, the node will branch directly to the |
1673 // In the case of failure, the node will branch directly to the |
1674 // FailureLabel |
1674 // FailureLabel |
1675 |
1675 |
1676 |
1676 |
1811 // update _owner from BasicLock to thread |
1811 // update _owner from BasicLock to thread |
1812 get_thread (scrReg); // beware: clobbers ICCs |
1812 get_thread (scrReg); // beware: clobbers ICCs |
1813 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); |
1813 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); |
1814 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success |
1814 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success |
1815 |
1815 |
1816 // If the CAS fails we can either retry or pass control to the slow-path. |
1816 // If the CAS fails we can either retry or pass control to the slow path. |
1817 // We use the latter tactic. |
1817 // We use the latter tactic. |
1818 // Pass the CAS result in the icc.ZFlag into DONE_LABEL |
1818 // Pass the CAS result in the icc.ZFlag into DONE_LABEL |
1819 // If the CAS was successful ... |
1819 // If the CAS was successful ... |
1820 // Self has acquired the lock |
1820 // Self has acquired the lock |
1821 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. |
1821 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. |
1822 // Intentional fall-through into DONE_LABEL ... |
1822 // Intentional fall-through into DONE_LABEL ... |
1823 #else // _LP64 |
1823 #else // _LP64 |
1824 // It's inflated |
1824 // It's inflated and we use scrReg for ObjectMonitor* in this section. |
1825 movq(scrReg, tmpReg); |
1825 movq(scrReg, tmpReg); |
1826 xorq(tmpReg, tmpReg); |
1826 xorq(tmpReg, tmpReg); |
1827 |
|
1828 lock(); |
1827 lock(); |
1829 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
1828 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
1830 // Unconditionally set box->_displaced_header = markWord::unused_mark(). |
1829 // Unconditionally set box->_displaced_header = markWord::unused_mark(). |
1831 // Without cast to int32_t movptr will destroy r10 which is typically obj. |
1830 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
1832 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
1831 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
1833 // Intentional fall-through into DONE_LABEL ... |
1832 // Intentional fall-through into DONE_LABEL ... |
1834 // Propagate ICC.ZF from CAS above into DONE_LABEL. |
1833 // Propagate ICC.ZF from CAS above into DONE_LABEL. |
1835 #endif // _LP64 |
1834 #endif // _LP64 |
1836 #if INCLUDE_RTM_OPT |
1835 #if INCLUDE_RTM_OPT |
1842 // most efficient "long" NOP encodings. |
1841 // most efficient "long" NOP encodings. |
1843 // Unfortunately none of our alignment mechanisms suffice. |
1842 // Unfortunately none of our alignment mechanisms suffice. |
1844 bind(DONE_LABEL); |
1843 bind(DONE_LABEL); |
1845 |
1844 |
1846 // At DONE_LABEL the icc ZFlag is set as follows ... |
1845 // At DONE_LABEL the icc ZFlag is set as follows ... |
1847 // Fast_Unlock uses the same protocol. |
1846 // fast_unlock uses the same protocol. |
1848 // ZFlag == 1 -> Success |
1847 // ZFlag == 1 -> Success |
1849 // ZFlag == 0 -> Failure - force control through the slow-path |
1848 // ZFlag == 0 -> Failure - force control through the slow path |
1850 } |
1849 } |
1851 |
1850 |
1852 // obj: object to unlock |
1851 // obj: object to unlock |
1853 // box: box address (displaced header location), killed. Must be EAX. |
1852 // box: box address (displaced header location), killed. Must be EAX. |
1854 // tmp: killed, cannot be obj nor box. |
1853 // tmp: killed, cannot be obj nor box. |
1855 // |
1854 // |
1856 // Some commentary on balanced locking: |
1855 // Some commentary on balanced locking: |
1857 // |
1856 // |
1858 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. |
1857 // fast_lock and fast_unlock are emitted only for provably balanced lock sites. |
1859 // Methods that don't have provably balanced locking are forced to run in the |
1858 // Methods that don't have provably balanced locking are forced to run in the |
1860 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. |
1859 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. |
1861 // The interpreter provides two properties: |
1860 // The interpreter provides two properties: |
1862 // I1: At return-time the interpreter automatically and quietly unlocks any |
1861 // I1: At return-time the interpreter automatically and quietly unlocks any |
1863 // objects acquired the current activation (frame). Recall that the |
1862 // objects acquired the current activation (frame). Recall that the |
1874 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
1873 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
1875 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
1874 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
1876 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
1875 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
1877 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
1876 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
1878 // Arguably given that the spec legislates the JNI case as undefined our implementation |
1877 // Arguably given that the spec legislates the JNI case as undefined our implementation |
1879 // could reasonably *avoid* checking owner in Fast_Unlock(). |
1878 // could reasonably *avoid* checking owner in fast_unlock(). |
1880 // In the interest of performance we elide m->Owner==Self check in unlock. |
1879 // In the interest of performance we elide m->Owner==Self check in unlock. |
1881 // A perfectly viable alternative is to elide the owner check except when |
1880 // A perfectly viable alternative is to elide the owner check except when |
1882 // Xcheck:jni is enabled. |
1881 // Xcheck:jni is enabled. |
1883 |
1882 |
1884 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
1883 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
1939 // |
1938 // |
1940 // If there's no contention try a 1-0 exit. That is, exit without |
1939 // If there's no contention try a 1-0 exit. That is, exit without |
1941 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how |
1940 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how |
1942 // we detect and recover from the race that the 1-0 exit admits. |
1941 // we detect and recover from the race that the 1-0 exit admits. |
1943 // |
1942 // |
1944 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier |
1943 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier |
1945 // before it STs null into _owner, releasing the lock. Updates |
1944 // before it STs null into _owner, releasing the lock. Updates |
1946 // to data protected by the critical section must be visible before |
1945 // to data protected by the critical section must be visible before |
1947 // we drop the lock (and thus before any other thread could acquire |
1946 // we drop the lock (and thus before any other thread could acquire |
1948 // the lock and observe the fields protected by the lock). |
1947 // the lock and observe the fields protected by the lock). |
1949 // IA32's memory-model is SPO, so STs are ordered with respect to |
1948 // IA32's memory-model is SPO, so STs are ordered with respect to |
1988 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
1987 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
1989 jccb (Assembler::notZero, DONE_LABEL); |
1988 jccb (Assembler::notZero, DONE_LABEL); |
1990 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
1989 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
1991 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
1990 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
1992 jccb (Assembler::notZero, CheckSucc); |
1991 jccb (Assembler::notZero, CheckSucc); |
|
1992 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
1993 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
1993 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
1994 jmpb (DONE_LABEL); |
1994 jmpb (DONE_LABEL); |
1995 |
1995 |
1996 // Try to avoid passing control into the slow_path ... |
1996 // Try to avoid passing control into the slow_path ... |
1997 Label LSuccess, LGoSlowPath ; |
1997 Label LSuccess, LGoSlowPath ; |
1998 bind (CheckSucc); |
1998 bind (CheckSucc); |
1999 |
1999 |
2000 // The following optional optimization can be elided if necessary |
2000 // The following optional optimization can be elided if necessary |
2001 // Effectively: if (succ == null) goto SlowPath |
2001 // Effectively: if (succ == null) goto slow path |
2002 // The code reduces the window for a race, however, |
2002 // The code reduces the window for a race, however, |
2003 // and thus benefits performance. |
2003 // and thus benefits performance. |
2004 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2004 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2005 jccb (Assembler::zero, LGoSlowPath); |
2005 jccb (Assembler::zero, LGoSlowPath); |
2006 |
2006 |
2007 xorptr(boxReg, boxReg); |
2007 xorptr(boxReg, boxReg); |
|
2008 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
2008 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2009 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2009 |
2010 |
2010 // Memory barrier/fence |
2011 // Memory barrier/fence |
2011 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ |
2012 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ |
2012 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. |
2013 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. |
2037 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2038 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2038 // There's no successor so we tried to regrab the lock. |
2039 // There's no successor so we tried to regrab the lock. |
2039 // If that didn't work, then another thread grabbed the |
2040 // If that didn't work, then another thread grabbed the |
2040 // lock so we're done (and exit was a success). |
2041 // lock so we're done (and exit was a success). |
2041 jccb (Assembler::notEqual, LSuccess); |
2042 jccb (Assembler::notEqual, LSuccess); |
2042 // Intentional fall-through into slow-path |
2043 // Intentional fall-through into slow path |
2043 |
2044 |
2044 bind (LGoSlowPath); |
2045 bind (LGoSlowPath); |
2045 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure |
2046 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure |
2046 jmpb (DONE_LABEL); |
2047 jmpb (DONE_LABEL); |
2047 |
2048 |