# HG changeset patch # User dcubed # Date 1429197806 25200 # Node ID d4e471395ff59e0bbdce81ca18b9b3a4824a7241 # Parent a7ba42fa1df6799721a597d22aa2c147250235e2 8073165: Contended Locking fast exit bucket Summary: JEP-143/JDK-8073165 Contended Locking fast exit bucket Reviewed-by: dholmes, acorn, dice, dcubed Contributed-by: dave.dice@oracle.com, karen.kinnear@oracle.com, daniel.daugherty@oracle.com diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -3019,44 +3019,107 @@ // past the store that releases the lock. But TSO is a strong memory model // and that particular flavor of barrier is a noop, so we can safely elide it. // Note that we use 1-0 locking by default for the inflated case. We - // close the resultant (and rare) race by having contented threads in + // close the resultant (and rare) race by having contended threads in // monitorenter periodically poll _owner. - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch); - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), Rbox); - xor3(Rscratch, G2_thread, Rscratch); - orcc(Rbox, Rscratch, Rbox); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()-> - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList), Rscratch); - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq), Rbox); - orcc(Rbox, Rscratch, G0); - if (EmitSync & 65536) { - Label LSucc ; - brx(Assembler::notZero, false, Assembler::pn, LSucc); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); - - bind(LSucc); - st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); - if (os::is_MP()) { membar (StoreLoad); } - ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ), Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pt, done); - delayed()->andcc(G0, G0, G0); - add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); - mov(G2_thread, Rscratch); - cas_ptr(Rmark, G0, Rscratch); - // invert icc.zf and goto done - br_notnull(Rscratch, false, Assembler::pt, done); - delayed()->cmp(G0, G0); - ba(done); - delayed()->cmp(G0, 1); + + if (EmitSync & 1024) { + // Emit code to check that _owner == Self + // We could fold the _owner test into subsequent code more efficiently + // than using a stand-alone check, but since _owner checking is off by + // default we don't bother. We also might consider predicating the + // _owner==Self check on Xcheck:jni or running on a debug build. + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch); + orcc(Rscratch, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + } + + if (EmitSync & 512) { + // classic lock release code absent 1-0 locking + // m->Owner = null; + // membar #storeload + // if (m->cxq|m->EntryList) == null goto Success + // if (m->succ != null) goto Success + // if CAS (&m->Owner,0,Self) != 0 goto Success + // goto SlowPath + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); + orcc(Rbox, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + if (os::is_MP()) { membar(StoreLoad); } + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); + orcc(Rbox, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pt, done); + delayed()-> + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // invert icc.zf and goto done + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + br(Assembler::always, false, Assembler::pt, done); + delayed()->cmp(G0, 1); } else { - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)); + // 1-0 form : avoids CAS and MEMBAR in the common case + // Do not bother to ratify that m->Owner == Self. + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); + orcc(Rbox, G0, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()-> + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); + orcc(Rbox, Rscratch, G0); + if (EmitSync & 16384) { + // As an optional optimization, if (EntryList|cxq) != null and _succ is null then + // we should transfer control directly to the slow-path. + // This test makes the reacquire operation below very infrequent. + // The logic is equivalent to : + // if (cxq|EntryList) == null : Owner=null; goto Success + // if succ == null : goto SlowPath + // Owner=null; membar #storeload + // if succ != null : goto Success + // if CAS(&Owner,null,Self) != null goto Success + // goto SlowPath + brx(Assembler::zero, true, Assembler::pt, done); + delayed()-> + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0) ; + brx(Assembler::zero, false, Assembler::pt, done); + delayed()->orcc(G0, 1, G0); + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } else { + brx(Assembler::zero, false, Assembler::pt, done); + delayed()-> + st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } + if (os::is_MP()) { membar(StoreLoad); } + // Check that _succ is (or remains) non-zero + ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); + mov(G2_thread, Rscratch); + cas_ptr(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // invert icc.zf and goto done + // A slightly better v8+/v9 idiom would be the following: + // movrnz Rscratch,1,Rscratch + // ba done + // xorcc Rscratch,1,G0 + // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + br(Assembler::always, false, Assembler::pt, done); + delayed()->cmp(G0, 1); } bind (LStacked); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2664,6 +2664,9 @@ // disallows any pending_exception. __ mov(L3_box, O1); + // Pass in current thread pointer + __ mov(G2_thread, O2); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); __ delayed()->mov(L4, O0); // Need oop in O0 diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/x86/vm/globals_x86.hpp --- a/hotspot/src/cpu/x86/vm/globals_x86.hpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp Thu Apr 16 08:23:26 2015 -0700 @@ -87,9 +87,6 @@ develop(bool, IEEEPrecision, true, \ "Enables IEEE precision (for INTEL only)") \ \ - product(intx, FenceInstruction, 0, \ - "(Unsafe,Unstable) Experimental") \ - \ product(bool, UseStoreImmI16, true, \ "Use store immediate 16-bits value instruction on x86") \ \ diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1958,6 +1958,11 @@ // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter // should not be unlocked by "normal" java-level locking and vice-versa. The specification // doesn't specify what will occur if a program engages in such mixed-mode locking, however. +// Arguably given that the spec legislates the JNI case as undefined our implementation +// could reasonably *avoid* checking owner in Fast_Unlock(). +// In the interest of performance we elide m->Owner==Self check in unlock. +// A perfectly viable alternative is to elide the owner check except when +// Xcheck:jni is enabled. void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { assert(boxReg == rax, ""); @@ -1966,24 +1971,6 @@ if (EmitSync & 4) { // Disable - inhibit all inlining. Force control through the slow-path cmpptr (rsp, 0); - } else - if (EmitSync & 8) { - Label DONE_LABEL; - if (UseBiasedLocking) { - biased_locking_exit(objReg, tmpReg, DONE_LABEL); - } - // Classic stack-locking code ... - // Check whether the displaced header is 0 - //(=> recursive unlock) - movptr(tmpReg, Address(boxReg, 0)); - testptr(tmpReg, tmpReg); - jccb(Assembler::zero, DONE_LABEL); - // If not recursive lock, reset the header to displaced header - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - bind(DONE_LABEL); } else { Label DONE_LABEL, Stacked, CheckSucc; @@ -2060,9 +2047,9 @@ // the number of loads below (currently 4) to just 2 or 3. // Refer to the comments in synchronizer.cpp. // In practice the chain of fetches doesn't seem to impact performance, however. + xorptr(boxReg, boxReg); if ((EmitSync & 65536) == 0 && (EmitSync & 256)) { // Attempt to reduce branch density - AMD's branch predictor. - xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); @@ -2070,7 +2057,6 @@ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); jmpb (DONE_LABEL); } else { - xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); @@ -2093,10 +2079,8 @@ bind (CheckSucc); // Optional pre-test ... it's safe to elide this - if ((EmitSync & 16) == 0) { - cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); - } + cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); + jccb(Assembler::zero, LGoSlowPath); // We have a classic Dekker-style idiom: // ST m->_owner = 0 ; MEMBAR; LD m->_succ @@ -2109,7 +2093,8 @@ // In older IA32 processors MFENCE is slower than lock:add or xchg // particularly if the write-buffer is full as might be the case if // if stores closely precede the fence or fence-equivalent instruction. - // In more modern implementations MFENCE appears faster, however. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + // as the situation has changed with Nehalem and Shanghai. // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack // The $lines underlying the top-of-stack should be in M-state. // The locked add instruction is serializing, of course. @@ -2126,11 +2111,7 @@ movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); if (os::is_MP()) { - if (VM_Version::supports_sse2() && 1 == FenceInstruction) { - mfence(); - } else { - lock (); addptr(Address(rsp, 0), 0); - } + lock(); addptr(Address(rsp, 0), 0); } // Ratify _succ remains non-null cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0); @@ -2179,8 +2160,17 @@ } #else // _LP64 // It's inflated - movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); - xorptr(boxReg, r15_thread); + if (EmitSync & 1024) { + // Emit code to check that _owner == Self + // We could fold the _owner test into subsequent code more efficiently + // than using a stand-alone check, but since _owner checking is off by + // default we don't bother. We also might consider predicating the + // _owner==Self check on Xcheck:jni or running on a debug build. + movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + xorptr(boxReg, r15_thread); + } else { + xorptr(boxReg, boxReg); + } orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); @@ -2190,23 +2180,51 @@ jmpb (DONE_LABEL); if ((EmitSync & 65536) == 0) { + // Try to avoid passing control into the slow_path ... Label LSuccess, LGoSlowPath ; bind (CheckSucc); + + // The following optional optimization can be elided if necessary + // Effectively: if (succ == null) goto SlowPath + // The code reduces the window for a race, however, + // and thus benefits performance. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::zero, LGoSlowPath); - // I'd much rather use lock:andl m->_owner, 0 as it's faster than the - // the explicit ST;MEMBAR combination, but masm doesn't currently support - // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc - // are all faster when the write buffer is populated. - movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); - if (os::is_MP()) { - lock (); addl (Address(rsp, 0), 0); + if ((EmitSync & 16) && os::is_MP()) { + orptr(boxReg, boxReg); + xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + } else { + movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); + if (os::is_MP()) { + // Memory barrier/fence + // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ + // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. + // This is faster on Nehalem and AMD Shanghai/Barcelona. + // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences + // We might also restructure (ST Owner=0;barrier;LD _Succ) to + // (mov box,0; xchgq box, &m->Owner; LD _succ) . + lock(); addl(Address(rsp, 0), 0); + } } cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::notZero, LSuccess); - movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX + // Rare inopportune interleaving - race. + // The successor vanished in the small window above. + // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. + // We need to ensure progress and succession. + // Try to reacquire the lock. + // If that fails then the new owner is responsible for succession and this + // thread needs to take no further action and can exit via the fast path (success). + // If the re-acquire succeeds then pass control into the slow path. + // As implemented, this latter mode is horrible because we generated more + // coherence traffic on the lock *and* artifically extended the critical section + // length while by virtue of passing control into the slow path. + + // box is really RAX -- the following CMPXCHG depends on that binding + // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) + movptr(boxReg, (int32_t)NULL_WORD); if (os::is_MP()) { lock(); } cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); jccb (Assembler::notEqual, LSuccess); @@ -2231,10 +2249,6 @@ } #endif bind(DONE_LABEL); - // Avoid branch to branch on AMD processors - if (EmitSync & 32768) { - nop(); - } } } #endif // COMPILER2 diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2343,12 +2343,14 @@ // should be a peal // +wordSize because of the push above + // args are (oop obj, BasicLock* lock, JavaThread* thread) + __ push(thread); __ lea(rax, Address(rbp, lock_slot_rbp_offset)); __ push(rax); __ push(obj_reg); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); - __ addptr(rsp, 2*wordSize); + __ addptr(rsp, 3*wordSize); #ifdef ASSERT { Label L; diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2581,6 +2581,7 @@ __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ mov(c_rarg0, obj_reg); + __ mov(c_rarg2, r15_thread); __ mov(r12, rsp); // remember sp __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // align stack as required by ABI @@ -2590,6 +2591,7 @@ __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset()))); __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD); + // args are (oop obj, BasicLock* lock, JavaThread* thread) __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/opto/macro.cpp --- a/hotspot/src/share/vm/opto/macro.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/opto/macro.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -144,7 +144,9 @@ } //------------------------------make_slow_call--------------------------------- -CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1) { +CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, + address slow_call, const char* leaf_name, Node* slow_path, + Node* parm0, Node* parm1, Node* parm2) { // Slow-path call CallNode *call = leaf_name @@ -155,6 +157,7 @@ copy_predefined_input_for_runtime_call(slow_path, oldcall, call ); if (parm0 != NULL) call->init_req(TypeFunc::Parms+0, parm0); if (parm1 != NULL) call->init_req(TypeFunc::Parms+1, parm1); + if (parm2 != NULL) call->init_req(TypeFunc::Parms+2, parm2); copy_call_debug_info(oldcall, call); call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON. _igvn.replace_node(oldcall, call); @@ -2328,7 +2331,9 @@ } // Make slow path call - CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box ); + CallNode *call = make_slow_call((CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), + OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, + obj, box, NULL); extract_call_projections(call); @@ -2395,8 +2400,11 @@ funlock = transform_later( funlock )->as_FastUnlock(); // Optimize test; set region slot 2 Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0); + Node *thread = transform_later(new ThreadLocalNode()); - CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box ); + CallNode *call = make_slow_call((CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), + CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + "complete_monitor_unlocking_C", slow_path, obj, box, thread); extract_call_projections(call); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/opto/macro.hpp --- a/hotspot/src/share/vm/opto/macro.hpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/opto/macro.hpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -186,7 +186,8 @@ Node* opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path = false); void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call); CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, - const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1); + const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1, + Node* parm2); void extract_call_projections(CallNode *call); Node* initialize_object(AllocateNode* alloc, diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/opto/runtime.cpp --- a/hotspot/src/share/vm/opto/runtime.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/opto/runtime.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -600,10 +600,11 @@ //----------------------------------------------------------------------------- const TypeFunc *OptoRuntime::complete_monitor_exit_Type() { // create input type (domain) - const Type **fields = TypeTuple::fields(2); + const Type **fields = TypeTuple::fields(3); fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked - fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields); + fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock - BasicLock + fields[TypeFunc::Parms+2] = TypeRawPtr::BOTTOM; // Thread pointer (Self) + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields); // create result type (range) fields = TypeTuple::fields(0); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/opto/runtime.hpp --- a/hotspot/src/share/vm/opto/runtime.hpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/opto/runtime.hpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -184,7 +184,7 @@ public: // Slow-path Locking and Unlocking static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); - static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock); + static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); private: diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/runtime/sharedRuntime.cpp --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -1819,9 +1819,9 @@ JRT_END // Handles the uncommon cases of monitor unlocking in compiled code -JRT_LEAF(void, SharedRuntime::complete_monitor_unlocking_C(oopDesc* _obj, BasicLock* lock)) +JRT_LEAF(void, SharedRuntime::complete_monitor_unlocking_C(oopDesc* _obj, BasicLock* lock, JavaThread * THREAD)) oop obj(_obj); - Thread* THREAD = JavaThread::current(); + assert(JavaThread::current() == THREAD, "invariant"); // I'm not convinced we need the code contained by MIGHT_HAVE_PENDING anymore // testing was unable to ever fire the assert that guarded it so I have removed it. assert(!HAS_PENDING_EXCEPTION, "Do we need code below anymore?"); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/runtime/sharedRuntime.hpp --- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp Thu Apr 16 08:23:26 2015 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -472,7 +472,7 @@ // Slow-path Locking and Unlocking static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); - static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock); + static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); // Resolving of calls static address resolve_static_call_C (JavaThread *thread); diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/runtime/synchronizer.cpp --- a/hotspot/src/share/vm/runtime/synchronizer.cpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/runtime/synchronizer.cpp Thu Apr 16 08:23:26 2015 -0700 @@ -109,17 +109,24 @@ } #define NINFLATIONLOCKS 256 -static volatile intptr_t InflationLocks[NINFLATIONLOCKS]; +static volatile intptr_t gInflationLocks[NINFLATIONLOCKS]; +// global list of blocks of monitors // gBlockList is really PaddedEnd *, but we don't // want to expose the PaddedEnd template more than necessary. ObjectMonitor * ObjectSynchronizer::gBlockList = NULL; +// global monitor free list ObjectMonitor * volatile ObjectSynchronizer::gFreeList = NULL; +// global monitor in-use list, for moribund threads, +// monitors they inflated need to be scanned for deflation ObjectMonitor * volatile ObjectSynchronizer::gOmInUseList = NULL; +// count of entries in gOmInUseList int ObjectSynchronizer::gOmInUseCount = 0; -static volatile intptr_t ListLock = 0; // protects global monitor free-list cache -static volatile int MonitorFreeCount = 0; // # on gFreeList -static volatile int MonitorPopulation = 0; // # Extant -- in circulation + +static volatile intptr_t gListLock = 0; // protects global monitor lists +static volatile int gMonitorFreeCount = 0; // # on gFreeList +static volatile int gMonitorPopulation = 0; // # Extant -- in circulation + #define CHAINMARKER (cast_to_oop(-1)) @@ -528,7 +535,7 @@ int YieldThenBlock = 0; assert(ix >= 0 && ix < NINFLATIONLOCKS, "invariant"); assert((NINFLATIONLOCKS & (NINFLATIONLOCKS-1)) == 0, "invariant"); - Thread::muxAcquire(InflationLocks + ix, "InflationLock"); + Thread::muxAcquire(gInflationLocks + ix, "gInflationLock"); while (obj->mark() == markOopDesc::INFLATING()) { // Beware: NakedYield() is advisory and has almost no effect on some platforms // so we periodically call Self->_ParkEvent->park(1). @@ -539,7 +546,7 @@ os::naked_yield(); } } - Thread::muxRelease(InflationLocks + ix); + Thread::muxRelease(gInflationLocks + ix); TEVENT(Inflate: INFLATING - yield/park); } } else { @@ -882,7 +889,7 @@ // STW-time -- disassociates idle monitors from objects. Such // scavenged monitors are returned to the gFreeList. // -// The global list is protected by ListLock. All the critical sections +// The global list is protected by gListLock. All the critical sections // are short and operate in constant-time. // // ObjectMonitors reside in type-stable memory (TSM) and are immortal. @@ -937,17 +944,17 @@ void ObjectSynchronizer::verifyInUse(Thread *Self) { ObjectMonitor* mid; - int inusetally = 0; + int in_use_tally = 0; for (mid = Self->omInUseList; mid != NULL; mid = mid->FreeNext) { - inusetally++; + in_use_tally++; } - assert(inusetally == Self->omInUseCount, "inuse count off"); + assert(in_use_tally == Self->omInUseCount, "in-use count off"); - int freetally = 0; + int free_tally = 0; for (mid = Self->omFreeList; mid != NULL; mid = mid->FreeNext) { - freetally++; + free_tally++; } - assert(freetally == Self->omFreeCount, "free count off"); + assert(free_tally == Self->omFreeCount, "free count off"); } ObjectMonitor * NOINLINE ObjectSynchronizer::omAlloc(Thread * Self) { @@ -964,7 +971,7 @@ // Threads will attempt to allocate first from their local list, then // from the global list, and only after those attempts fail will the thread // attempt to instantiate new monitors. Thread-local free lists take - // heat off the ListLock and improve allocation latency, as well as reducing + // heat off the gListLock and improve allocation latency, as well as reducing // coherency traffic on the shared global list. m = Self->omFreeList; if (m != NULL) { @@ -994,9 +1001,9 @@ // Reprovision the thread's omFreeList. // Use bulk transfers to reduce the allocation rate and heat // on various locks. - Thread::muxAcquire(&ListLock, "omAlloc"); + Thread::muxAcquire(&gListLock, "omAlloc"); for (int i = Self->omFreeProvision; --i >= 0 && gFreeList != NULL;) { - MonitorFreeCount--; + gMonitorFreeCount--; ObjectMonitor * take = gFreeList; gFreeList = take->FreeNext; guarantee(take->object() == NULL, "invariant"); @@ -1004,13 +1011,13 @@ take->Recycle(); omRelease(Self, take, false); } - Thread::muxRelease(&ListLock); + Thread::muxRelease(&gListLock); Self->omFreeProvision += 1 + (Self->omFreeProvision/2); if (Self->omFreeProvision > MAXPRIVATE) Self->omFreeProvision = MAXPRIVATE; TEVENT(omFirst - reprovision); const int mx = MonitorBound; - if (mx > 0 && (MonitorPopulation-MonitorFreeCount) > mx) { + if (mx > 0 && (gMonitorPopulation-gMonitorFreeCount) > mx) { // We can't safely induce a STW safepoint from omAlloc() as our thread // state may not be appropriate for such activities and callers may hold // naked oops, so instead we defer the action. @@ -1068,11 +1075,11 @@ // block in hand. This avoids some lock traffic and redundant // list activity. - // Acquire the ListLock to manipulate BlockList and FreeList. + // Acquire the gListLock to manipulate gBlockList and gFreeList. // An Oyama-Taura-Yonezawa scheme might be more efficient. - Thread::muxAcquire(&ListLock, "omAlloc [2]"); - MonitorPopulation += _BLOCKSIZE-1; - MonitorFreeCount += _BLOCKSIZE-1; + Thread::muxAcquire(&gListLock, "omAlloc [2]"); + gMonitorPopulation += _BLOCKSIZE-1; + gMonitorFreeCount += _BLOCKSIZE-1; // Add the new block to the list of extant blocks (gBlockList). // The very first objectMonitor in a block is reserved and dedicated. @@ -1083,7 +1090,7 @@ // Add the new string of objectMonitors to the global free list temp[_BLOCKSIZE - 1].FreeNext = gFreeList; gFreeList = temp + 1; - Thread::muxRelease(&ListLock); + Thread::muxRelease(&gListLock); TEVENT(Allocate block of monitors); } } @@ -1094,32 +1101,36 @@ // omRelease is to return a monitor to the free list after a CAS // attempt failed. This doesn't allow unbounded #s of monitors to // accumulate on a thread's free list. +// +// Key constraint: all ObjectMonitors on a thread's free list and the global +// free list must have their object field set to null. This prevents the +// scavenger -- deflate_idle_monitors -- from reclaiming them. void ObjectSynchronizer::omRelease(Thread * Self, ObjectMonitor * m, bool fromPerThreadAlloc) { guarantee(m->object() == NULL, "invariant"); - + guarantee(((m->is_busy()|m->_recursions) == 0), "freeing in-use monitor"); // Remove from omInUseList if (MonitorInUseLists && fromPerThreadAlloc) { - ObjectMonitor* curmidinuse = NULL; - for (ObjectMonitor* mid = Self->omInUseList; mid != NULL;) { + ObjectMonitor* cur_mid_in_use = NULL; + bool extracted = false; + for (ObjectMonitor* mid = Self->omInUseList; mid != NULL; cur_mid_in_use = mid, mid = mid->FreeNext) { if (m == mid) { - // extract from per-thread in-use-list + // extract from per-thread in-use list if (mid == Self->omInUseList) { Self->omInUseList = mid->FreeNext; - } else if (curmidinuse != NULL) { - curmidinuse->FreeNext = mid->FreeNext; // maintain the current thread inuselist + } else if (cur_mid_in_use != NULL) { + cur_mid_in_use->FreeNext = mid->FreeNext; // maintain the current thread in-use list } + extracted = true; Self->omInUseCount--; if (ObjectMonitor::Knob_VerifyInUse) { verifyInUse(Self); } break; - } else { - curmidinuse = mid; - mid = mid->FreeNext; } } + assert(extracted, "Should have extracted from in-use list"); } // FreeNext is used for both omInUseList and omFreeList, so clear old before setting new @@ -1149,52 +1160,60 @@ // operator. void ObjectSynchronizer::omFlush(Thread * Self) { - ObjectMonitor * List = Self->omFreeList; // Null-terminated SLL + ObjectMonitor * list = Self->omFreeList; // Null-terminated SLL Self->omFreeList = NULL; - ObjectMonitor * Tail = NULL; - int Tally = 0; - if (List != NULL) { + ObjectMonitor * tail = NULL; + int tally = 0; + if (list != NULL) { ObjectMonitor * s; - for (s = List; s != NULL; s = s->FreeNext) { - Tally++; - Tail = s; + // The thread is going away, the per-thread free monitors + // are freed via set_owner(NULL) + // Link them to tail, which will be linked into the global free list + // gFreeList below, under the gListLock + for (s = list; s != NULL; s = s->FreeNext) { + tally++; + tail = s; guarantee(s->object() == NULL, "invariant"); guarantee(!s->is_busy(), "invariant"); s->set_owner(NULL); // redundant but good hygiene TEVENT(omFlush - Move one); } - guarantee(Tail != NULL && List != NULL, "invariant"); + guarantee(tail != NULL && list != NULL, "invariant"); } - ObjectMonitor * InUseList = Self->omInUseList; - ObjectMonitor * InUseTail = NULL; - int InUseTally = 0; - if (InUseList != NULL) { + ObjectMonitor * inUseList = Self->omInUseList; + ObjectMonitor * inUseTail = NULL; + int inUseTally = 0; + if (inUseList != NULL) { Self->omInUseList = NULL; - ObjectMonitor *curom; - for (curom = InUseList; curom != NULL; curom = curom->FreeNext) { - InUseTail = curom; - InUseTally++; + ObjectMonitor *cur_om; + // The thread is going away, however the omInUseList inflated + // monitors may still be in-use by other threads. + // Link them to inUseTail, which will be linked into the global in-use list + // gOmInUseList below, under the gListLock + for (cur_om = inUseList; cur_om != NULL; cur_om = cur_om->FreeNext) { + inUseTail = cur_om; + inUseTally++; } - assert(Self->omInUseCount == InUseTally, "inuse count off"); + assert(Self->omInUseCount == inUseTally, "in-use count off"); Self->omInUseCount = 0; - guarantee(InUseTail != NULL && InUseList != NULL, "invariant"); + guarantee(inUseTail != NULL && inUseList != NULL, "invariant"); } - Thread::muxAcquire(&ListLock, "omFlush"); - if (Tail != NULL) { - Tail->FreeNext = gFreeList; - gFreeList = List; - MonitorFreeCount += Tally; + Thread::muxAcquire(&gListLock, "omFlush"); + if (tail != NULL) { + tail->FreeNext = gFreeList; + gFreeList = list; + gMonitorFreeCount += tally; } - if (InUseTail != NULL) { - InUseTail->FreeNext = gOmInUseList; - gOmInUseList = InUseList; - gOmInUseCount += InUseTally; + if (inUseTail != NULL) { + inUseTail->FreeNext = gOmInUseList; + gOmInUseList = inUseList; + gOmInUseCount += inUseTally; } - Thread::muxRelease(&ListLock); + Thread::muxRelease(&gListLock); TEVENT(omFlush); } @@ -1411,14 +1430,14 @@ // // We have added a flag, MonitorInUseLists, which creates a list // of active monitors for each thread. deflate_idle_monitors() -// only scans the per-thread inuse lists. omAlloc() puts all +// only scans the per-thread in-use lists. omAlloc() puts all // assigned monitors on the per-thread list. deflate_idle_monitors() // returns the non-busy monitors to the global free list. // When a thread dies, omFlush() adds the list of active monitors for // that thread to a global gOmInUseList acquiring the // global list lock. deflate_idle_monitors() acquires the global // list lock to scan for non-busy monitors to the global free list. -// An alternative could have used a single global inuse list. The +// An alternative could have used a single global in-use list. The // downside would have been the additional cost of acquiring the global list lock // for every omAlloc(). // @@ -1432,8 +1451,8 @@ MaximumRecheckInterval = 1000 }; -// Deflate a single monitor if not in use -// Return true if deflated, false if in use +// Deflate a single monitor if not in-use +// Return true if deflated, false if in-use bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp) { @@ -1465,11 +1484,11 @@ assert(mid->object() == NULL, "invariant"); - // Move the object to the working free list defined by FreeHead,FreeTail. + // Move the object to the working free list defined by freeHeadp, freeTailp if (*freeHeadp == NULL) *freeHeadp = mid; if (*freeTailp != NULL) { ObjectMonitor * prevtail = *freeTailp; - assert(prevtail->FreeNext == NULL, "cleaned up deflated?"); // TODO KK + assert(prevtail->FreeNext == NULL, "cleaned up deflated?"); prevtail->FreeNext = mid; } *freeTailp = mid; @@ -1478,38 +1497,37 @@ return deflated; } -// Caller acquires ListLock -int ObjectSynchronizer::walk_monitor_list(ObjectMonitor** listheadp, - ObjectMonitor** freeHeadp, - ObjectMonitor** freeTailp) { +// Walk a given monitor list, and deflate idle monitors +// The given list could be a per-thread list or a global list +// Caller acquires gListLock +int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** listHeadp, + ObjectMonitor** freeHeadp, + ObjectMonitor** freeTailp) { ObjectMonitor* mid; ObjectMonitor* next; - ObjectMonitor* curmidinuse = NULL; - int deflatedcount = 0; + ObjectMonitor* cur_mid_in_use = NULL; + int deflated_count = 0; - for (mid = *listheadp; mid != NULL;) { + for (mid = *listHeadp; mid != NULL;) { oop obj = (oop) mid->object(); - bool deflated = false; - if (obj != NULL) { - deflated = deflate_monitor(mid, obj, freeHeadp, freeTailp); - } - if (deflated) { - // extract from per-thread in-use-list - if (mid == *listheadp) { - *listheadp = mid->FreeNext; - } else if (curmidinuse != NULL) { - curmidinuse->FreeNext = mid->FreeNext; // maintain the current thread inuselist + if (obj != NULL && deflate_monitor(mid, obj, freeHeadp, freeTailp)) { + // if deflate_monitor succeeded, + // extract from per-thread in-use list + if (mid == *listHeadp) { + *listHeadp = mid->FreeNext; + } else if (cur_mid_in_use != NULL) { + cur_mid_in_use->FreeNext = mid->FreeNext; // maintain the current thread in-use list } next = mid->FreeNext; - mid->FreeNext = NULL; // This mid is current tail in the FreeHead list + mid->FreeNext = NULL; // This mid is current tail in the freeHeadp list mid = next; - deflatedcount++; + deflated_count++; } else { - curmidinuse = mid; + cur_mid_in_use = mid; mid = mid->FreeNext; } } - return deflatedcount; + return deflated_count; } void ObjectSynchronizer::deflate_idle_monitors() { @@ -1519,34 +1537,34 @@ int nScavenged = 0; // reclaimed bool deflated = false; - ObjectMonitor * FreeHead = NULL; // Local SLL of scavenged monitors - ObjectMonitor * FreeTail = NULL; + ObjectMonitor * freeHeadp = NULL; // Local SLL of scavenged monitors + ObjectMonitor * freeTailp = NULL; TEVENT(deflate_idle_monitors); // Prevent omFlush from changing mids in Thread dtor's during deflation // And in case the vm thread is acquiring a lock during a safepoint // See e.g. 6320749 - Thread::muxAcquire(&ListLock, "scavenge - return"); + Thread::muxAcquire(&gListLock, "scavenge - return"); if (MonitorInUseLists) { int inUse = 0; for (JavaThread* cur = Threads::first(); cur != NULL; cur = cur->next()) { nInCirculation+= cur->omInUseCount; - int deflatedcount = walk_monitor_list(cur->omInUseList_addr(), &FreeHead, &FreeTail); - cur->omInUseCount-= deflatedcount; + int deflated_count = deflate_monitor_list(cur->omInUseList_addr(), &freeHeadp, &freeTailp); + cur->omInUseCount-= deflated_count; if (ObjectMonitor::Knob_VerifyInUse) { verifyInUse(cur); } - nScavenged += deflatedcount; + nScavenged += deflated_count; nInuse += cur->omInUseCount; } // For moribund threads, scan gOmInUseList if (gOmInUseList) { nInCirculation += gOmInUseCount; - int deflatedcount = walk_monitor_list((ObjectMonitor **)&gOmInUseList, &FreeHead, &FreeTail); - gOmInUseCount-= deflatedcount; - nScavenged += deflatedcount; + int deflated_count = deflate_monitor_list((ObjectMonitor **)&gOmInUseList, &freeHeadp, &freeTailp); + gOmInUseCount-= deflated_count; + nScavenged += deflated_count; nInuse += gOmInUseCount; } @@ -1568,7 +1586,7 @@ guarantee(!mid->is_busy(), "invariant"); continue; } - deflated = deflate_monitor(mid, obj, &FreeHead, &FreeTail); + deflated = deflate_monitor(mid, obj, &freeHeadp, &freeTailp); if (deflated) { mid->FreeNext = NULL; @@ -1579,28 +1597,28 @@ } } - MonitorFreeCount += nScavenged; + gMonitorFreeCount += nScavenged; - // Consider: audit gFreeList to ensure that MonitorFreeCount and list agree. + // Consider: audit gFreeList to ensure that gMonitorFreeCount and list agree. if (ObjectMonitor::Knob_Verbose) { ::printf("Deflate: InCirc=%d InUse=%d Scavenged=%d ForceMonitorScavenge=%d : pop=%d free=%d\n", nInCirculation, nInuse, nScavenged, ForceMonitorScavenge, - MonitorPopulation, MonitorFreeCount); + gMonitorPopulation, gMonitorFreeCount); ::fflush(stdout); } ForceMonitorScavenge = 0; // Reset // Move the scavenged monitors back to the global free list. - if (FreeHead != NULL) { - guarantee(FreeTail != NULL && nScavenged > 0, "invariant"); - assert(FreeTail->FreeNext == NULL, "invariant"); + if (freeHeadp != NULL) { + guarantee(freeTailp != NULL && nScavenged > 0, "invariant"); + assert(freeTailp->FreeNext == NULL, "invariant"); // constant-time list splice - prepend scavenged segment to gFreeList - FreeTail->FreeNext = gFreeList; - gFreeList = FreeHead; + freeTailp->FreeNext = gFreeList; + gFreeList = freeHeadp; } - Thread::muxRelease(&ListLock); + Thread::muxRelease(&gListLock); if (ObjectMonitor::_sync_Deflations != NULL) ObjectMonitor::_sync_Deflations->inc(nScavenged); if (ObjectMonitor::_sync_MonExtant != NULL) ObjectMonitor::_sync_MonExtant ->set_value(nInCirculation); @@ -1648,9 +1666,9 @@ assert(THREAD == JavaThread::current(), "must be current Java thread"); No_Safepoint_Verifier nsv; ReleaseJavaMonitorsClosure rjmc(THREAD); - Thread::muxAcquire(&ListLock, "release_monitors_owned_by_thread"); + Thread::muxAcquire(&gListLock, "release_monitors_owned_by_thread"); ObjectSynchronizer::monitors_iterate(&rjmc); - Thread::muxRelease(&ListLock); + Thread::muxRelease(&gListLock); THREAD->clear_pending_exception(); } diff -r a7ba42fa1df6 -r d4e471395ff5 hotspot/src/share/vm/runtime/synchronizer.hpp --- a/hotspot/src/share/vm/runtime/synchronizer.hpp Wed Apr 15 17:34:28 2015 -0700 +++ b/hotspot/src/share/vm/runtime/synchronizer.hpp Thu Apr 16 08:23:26 2015 -0700 @@ -116,9 +116,10 @@ // Basically we deflate all monitors that are not busy. // An adaptive profile-based deflation policy could be used if needed static void deflate_idle_monitors(); - static int walk_monitor_list(ObjectMonitor** listheadp, - ObjectMonitor** freeHeadp, - ObjectMonitor** freeTailp); + // For a given monitor list: global or per-thread, deflate idle monitors + static int deflate_monitor_list(ObjectMonitor** listheadp, + ObjectMonitor** freeHeadp, + ObjectMonitor** freeTailp); static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** freeHeadp, ObjectMonitor** freeTailp); @@ -135,16 +136,17 @@ private: enum { _BLOCKSIZE = 128 }; + // global list of blocks of monitors // gBlockList is really PaddedEnd *, but we don't // want to expose the PaddedEnd template more than necessary. - static ObjectMonitor* gBlockList; + static ObjectMonitor * gBlockList; + // global monitor free list static ObjectMonitor * volatile gFreeList; - // global monitor in use list, for moribund threads, + // global monitor in-use list, for moribund threads, // monitors they inflated need to be scanned for deflation static ObjectMonitor * volatile gOmInUseList; // count of entries in gOmInUseList static int gOmInUseCount; - }; // ObjectLocker enforced balanced locking and can never thrown an