1956 // |
1956 // |
1957 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
1957 // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
1958 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
1958 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
1959 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
1959 // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
1960 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
1960 // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
|
1961 // Arguably given that the spec legislates the JNI case as undefined our implementation |
|
1962 // could reasonably *avoid* checking owner in Fast_Unlock(). |
|
1963 // In the interest of performance we elide m->Owner==Self check in unlock. |
|
1964 // A perfectly viable alternative is to elide the owner check except when |
|
1965 // Xcheck:jni is enabled. |
1961 |
1966 |
1962 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
1967 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
1963 assert(boxReg == rax, ""); |
1968 assert(boxReg == rax, ""); |
1964 assert_different_registers(objReg, boxReg, tmpReg); |
1969 assert_different_registers(objReg, boxReg, tmpReg); |
1965 |
1970 |
1966 if (EmitSync & 4) { |
1971 if (EmitSync & 4) { |
1967 // Disable - inhibit all inlining. Force control through the slow-path |
1972 // Disable - inhibit all inlining. Force control through the slow-path |
1968 cmpptr (rsp, 0); |
1973 cmpptr (rsp, 0); |
1969 } else |
|
1970 if (EmitSync & 8) { |
|
1971 Label DONE_LABEL; |
|
1972 if (UseBiasedLocking) { |
|
1973 biased_locking_exit(objReg, tmpReg, DONE_LABEL); |
|
1974 } |
|
1975 // Classic stack-locking code ... |
|
1976 // Check whether the displaced header is 0 |
|
1977 //(=> recursive unlock) |
|
1978 movptr(tmpReg, Address(boxReg, 0)); |
|
1979 testptr(tmpReg, tmpReg); |
|
1980 jccb(Assembler::zero, DONE_LABEL); |
|
1981 // If not recursive lock, reset the header to displaced header |
|
1982 if (os::is_MP()) { |
|
1983 lock(); |
|
1984 } |
|
1985 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box |
|
1986 bind(DONE_LABEL); |
|
1987 } else { |
1974 } else { |
1988 Label DONE_LABEL, Stacked, CheckSucc; |
1975 Label DONE_LABEL, Stacked, CheckSucc; |
1989 |
1976 |
1990 // Critically, the biased locking test must have precedence over |
1977 // Critically, the biased locking test must have precedence over |
1991 // and appear before the (box->dhw == 0) recursive stack-lock test. |
1978 // and appear before the (box->dhw == 0) recursive stack-lock test. |
2058 |
2045 |
2059 // Note that we could employ various encoding schemes to reduce |
2046 // Note that we could employ various encoding schemes to reduce |
2060 // the number of loads below (currently 4) to just 2 or 3. |
2047 // the number of loads below (currently 4) to just 2 or 3. |
2061 // Refer to the comments in synchronizer.cpp. |
2048 // Refer to the comments in synchronizer.cpp. |
2062 // In practice the chain of fetches doesn't seem to impact performance, however. |
2049 // In practice the chain of fetches doesn't seem to impact performance, however. |
|
2050 xorptr(boxReg, boxReg); |
2063 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { |
2051 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { |
2064 // Attempt to reduce branch density - AMD's branch predictor. |
2052 // Attempt to reduce branch density - AMD's branch predictor. |
2065 xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2066 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2053 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2067 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2054 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2068 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2055 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2069 jccb (Assembler::notZero, DONE_LABEL); |
2056 jccb (Assembler::notZero, DONE_LABEL); |
2070 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); |
2057 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); |
2071 jmpb (DONE_LABEL); |
2058 jmpb (DONE_LABEL); |
2072 } else { |
2059 } else { |
2073 xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2074 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2060 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2075 jccb (Assembler::notZero, DONE_LABEL); |
2061 jccb (Assembler::notZero, DONE_LABEL); |
2076 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2062 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2077 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2063 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2078 jccb (Assembler::notZero, CheckSucc); |
2064 jccb (Assembler::notZero, CheckSucc); |
2107 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 |
2091 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8 |
2108 // (2) If supported, an explicit MFENCE is appealing. |
2092 // (2) If supported, an explicit MFENCE is appealing. |
2109 // In older IA32 processors MFENCE is slower than lock:add or xchg |
2093 // In older IA32 processors MFENCE is slower than lock:add or xchg |
2110 // particularly if the write-buffer is full as might be the case if |
2094 // particularly if the write-buffer is full as might be the case if |
2111 // if stores closely precede the fence or fence-equivalent instruction. |
2095 // if stores closely precede the fence or fence-equivalent instruction. |
2112 // In more modern implementations MFENCE appears faster, however. |
2096 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences |
|
2097 // as the situation has changed with Nehalem and Shanghai. |
2113 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack |
2098 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack |
2114 // The $lines underlying the top-of-stack should be in M-state. |
2099 // The $lines underlying the top-of-stack should be in M-state. |
2115 // The locked add instruction is serializing, of course. |
2100 // The locked add instruction is serializing, of course. |
2116 // (4) Use xchg, which is serializing |
2101 // (4) Use xchg, which is serializing |
2117 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works |
2102 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works |
2177 if ((EmitSync & 65536) == 0) { |
2158 if ((EmitSync & 65536) == 0) { |
2178 bind (CheckSucc); |
2159 bind (CheckSucc); |
2179 } |
2160 } |
2180 #else // _LP64 |
2161 #else // _LP64 |
2181 // It's inflated |
2162 // It's inflated |
2182 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2163 if (EmitSync & 1024) { |
2183 xorptr(boxReg, r15_thread); |
2164 // Emit code to check that _owner == Self |
|
2165 // We could fold the _owner test into subsequent code more efficiently |
|
2166 // than using a stand-alone check, but since _owner checking is off by |
|
2167 // default we don't bother. We also might consider predicating the |
|
2168 // _owner==Self check on Xcheck:jni or running on a debug build. |
|
2169 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2170 xorptr(boxReg, r15_thread); |
|
2171 } else { |
|
2172 xorptr(boxReg, boxReg); |
|
2173 } |
2184 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2174 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); |
2185 jccb (Assembler::notZero, DONE_LABEL); |
2175 jccb (Assembler::notZero, DONE_LABEL); |
2186 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2176 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); |
2187 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2177 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); |
2188 jccb (Assembler::notZero, CheckSucc); |
2178 jccb (Assembler::notZero, CheckSucc); |
2189 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2179 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2190 jmpb (DONE_LABEL); |
2180 jmpb (DONE_LABEL); |
2191 |
2181 |
2192 if ((EmitSync & 65536) == 0) { |
2182 if ((EmitSync & 65536) == 0) { |
|
2183 // Try to avoid passing control into the slow_path ... |
2193 Label LSuccess, LGoSlowPath ; |
2184 Label LSuccess, LGoSlowPath ; |
2194 bind (CheckSucc); |
2185 bind (CheckSucc); |
|
2186 |
|
2187 // The following optional optimization can be elided if necessary |
|
2188 // Effectively: if (succ == null) goto SlowPath |
|
2189 // The code reduces the window for a race, however, |
|
2190 // and thus benefits performance. |
2195 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2191 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2196 jccb (Assembler::zero, LGoSlowPath); |
2192 jccb (Assembler::zero, LGoSlowPath); |
2197 |
2193 |
2198 // I'd much rather use lock:andl m->_owner, 0 as it's faster than the |
2194 if ((EmitSync & 16) && os::is_MP()) { |
2199 // the explicit ST;MEMBAR combination, but masm doesn't currently support |
2195 orptr(boxReg, boxReg); |
2200 // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc |
2196 xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2201 // are all faster when the write buffer is populated. |
2197 } else { |
2202 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2198 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); |
2203 if (os::is_MP()) { |
2199 if (os::is_MP()) { |
2204 lock (); addl (Address(rsp, 0), 0); |
2200 // Memory barrier/fence |
|
2201 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ |
|
2202 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. |
|
2203 // This is faster on Nehalem and AMD Shanghai/Barcelona. |
|
2204 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences |
|
2205 // We might also restructure (ST Owner=0;barrier;LD _Succ) to |
|
2206 // (mov box,0; xchgq box, &m->Owner; LD _succ) . |
|
2207 lock(); addl(Address(rsp, 0), 0); |
|
2208 } |
2205 } |
2209 } |
2206 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2210 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); |
2207 jccb (Assembler::notZero, LSuccess); |
2211 jccb (Assembler::notZero, LSuccess); |
2208 |
2212 |
2209 movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX |
2213 // Rare inopportune interleaving - race. |
|
2214 // The successor vanished in the small window above. |
|
2215 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. |
|
2216 // We need to ensure progress and succession. |
|
2217 // Try to reacquire the lock. |
|
2218 // If that fails then the new owner is responsible for succession and this |
|
2219 // thread needs to take no further action and can exit via the fast path (success). |
|
2220 // If the re-acquire succeeds then pass control into the slow path. |
|
2221 // As implemented, this latter mode is horrible because we generated more |
|
2222 // coherence traffic on the lock *and* artifically extended the critical section |
|
2223 // length while by virtue of passing control into the slow path. |
|
2224 |
|
2225 // box is really RAX -- the following CMPXCHG depends on that binding |
|
2226 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) |
|
2227 movptr(boxReg, (int32_t)NULL_WORD); |
2210 if (os::is_MP()) { lock(); } |
2228 if (os::is_MP()) { lock(); } |
2211 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2229 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2212 jccb (Assembler::notEqual, LSuccess); |
2230 jccb (Assembler::notEqual, LSuccess); |
2213 // Intentional fall-through into slow-path |
2231 // Intentional fall-through into slow-path |
2214 |
2232 |