2646 |
2646 |
2647 if (counters != NULL) { |
2647 if (counters != NULL) { |
2648 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); |
2648 inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); |
2649 } |
2649 } |
2650 |
2650 |
2651 if (EmitSync & 1) { |
|
2652 mov(3, Rscratch); |
|
2653 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2654 cmp(SP, G0); |
|
2655 return ; |
|
2656 } |
|
2657 |
|
2658 if (EmitSync & 2) { |
|
2659 |
|
2660 // Fetch object's markword |
|
2661 ld_ptr(mark_addr, Rmark); |
|
2662 |
|
2663 if (try_bias) { |
|
2664 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
|
2665 } |
|
2666 |
|
2667 // Save Rbox in Rscratch to be used for the cas operation |
|
2668 mov(Rbox, Rscratch); |
|
2669 |
|
2670 // set Rmark to markOop | markOopDesc::unlocked_value |
|
2671 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
|
2672 |
|
2673 // Initialize the box. (Must happen before we update the object mark!) |
|
2674 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2675 |
|
2676 // compare object markOop with Rmark and if equal exchange Rscratch with object markOop |
|
2677 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
|
2678 cas_ptr(mark_addr.base(), Rmark, Rscratch); |
|
2679 |
|
2680 // if compare/exchange succeeded we found an unlocked object and we now have locked it |
|
2681 // hence we are done |
|
2682 cmp(Rmark, Rscratch); |
|
2683 sub(Rscratch, STACK_BIAS, Rscratch); |
|
2684 brx(Assembler::equal, false, Assembler::pt, done); |
|
2685 delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot |
|
2686 |
|
2687 // we did not find an unlocked object so see if this is a recursive case |
|
2688 // sub(Rscratch, SP, Rscratch); |
|
2689 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
|
2690 andcc(Rscratch, 0xfffff003, Rscratch); |
|
2691 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2692 bind (done); |
|
2693 return ; |
|
2694 } |
|
2695 |
|
2696 Label Egress ; |
2651 Label Egress ; |
2697 |
2652 |
2698 if (EmitSync & 256) { |
2653 // Aggressively avoid the Store-before-CAS penalty |
2699 Label IsInflated ; |
2654 // Defer the store into box->dhw until after the CAS |
2700 |
2655 Label IsInflated, Recursive ; |
2701 ld_ptr(mark_addr, Rmark); // fetch obj->mark |
|
2702 // Triage: biased, stack-locked, neutral, inflated |
|
2703 if (try_bias) { |
|
2704 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
|
2705 // Invariant: if control reaches this point in the emitted stream |
|
2706 // then Rmark has not been modified. |
|
2707 } |
|
2708 |
|
2709 // Store mark into displaced mark field in the on-stack basic-lock "box" |
|
2710 // Critically, this must happen before the CAS |
|
2711 // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. |
|
2712 st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2713 andcc(Rmark, 2, G0); |
|
2714 brx(Assembler::notZero, false, Assembler::pn, IsInflated); |
|
2715 delayed()-> |
|
2716 |
|
2717 // Try stack-lock acquisition. |
|
2718 // Beware: the 1st instruction is in a delay slot |
|
2719 mov(Rbox, Rscratch); |
|
2720 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
|
2721 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
|
2722 cas_ptr(mark_addr.base(), Rmark, Rscratch); |
|
2723 cmp(Rmark, Rscratch); |
|
2724 brx(Assembler::equal, false, Assembler::pt, done); |
|
2725 delayed()->sub(Rscratch, SP, Rscratch); |
|
2726 |
|
2727 // Stack-lock attempt failed - check for recursive stack-lock. |
|
2728 // See the comments below about how we might remove this case. |
|
2729 sub(Rscratch, STACK_BIAS, Rscratch); |
|
2730 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
|
2731 andcc(Rscratch, 0xfffff003, Rscratch); |
|
2732 br(Assembler::always, false, Assembler::pt, done); |
|
2733 delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2734 |
|
2735 bind(IsInflated); |
|
2736 if (EmitSync & 64) { |
|
2737 // If m->owner != null goto IsLocked |
|
2738 // Pessimistic form: Test-and-CAS vs CAS |
|
2739 // The optimistic form avoids RTS->RTO cache line upgrades. |
|
2740 ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch); |
|
2741 andcc(Rscratch, Rscratch, G0); |
|
2742 brx(Assembler::notZero, false, Assembler::pn, done); |
|
2743 delayed()->nop(); |
|
2744 // m->owner == null : it's unlocked. |
|
2745 } |
|
2746 |
|
2747 // Try to CAS m->owner from null to Self |
|
2748 // Invariant: if we acquire the lock then _recursions should be 0. |
|
2749 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
|
2750 mov(G2_thread, Rscratch); |
|
2751 cas_ptr(Rmark, G0, Rscratch); |
|
2752 cmp(Rscratch, G0); |
|
2753 // Intentional fall-through into done |
|
2754 } else { |
|
2755 // Aggressively avoid the Store-before-CAS penalty |
|
2756 // Defer the store into box->dhw until after the CAS |
|
2757 Label IsInflated, Recursive ; |
|
2758 |
2656 |
2759 // Anticipate CAS -- Avoid RTS->RTO upgrade |
2657 // Anticipate CAS -- Avoid RTS->RTO upgrade |
2760 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
2658 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
2761 |
2659 |
2762 ld_ptr(mark_addr, Rmark); // fetch obj->mark |
2660 ld_ptr(mark_addr, Rmark); // fetch obj->mark |
2763 // Triage: biased, stack-locked, neutral, inflated |
2661 // Triage: biased, stack-locked, neutral, inflated |
2764 |
2662 |
2765 if (try_bias) { |
2663 if (try_bias) { |
2766 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
2664 biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); |
2767 // Invariant: if control reaches this point in the emitted stream |
2665 // Invariant: if control reaches this point in the emitted stream |
2768 // then Rmark has not been modified. |
2666 // then Rmark has not been modified. |
2769 } |
2667 } |
2770 andcc(Rmark, 2, G0); |
2668 andcc(Rmark, 2, G0); |
2771 brx(Assembler::notZero, false, Assembler::pn, IsInflated); |
2669 brx(Assembler::notZero, false, Assembler::pn, IsInflated); |
2772 delayed()-> // Beware - dangling delay-slot |
2670 delayed()-> // Beware - dangling delay-slot |
2773 |
2671 |
2774 // Try stack-lock acquisition. |
2672 // Try stack-lock acquisition. |
2775 // Transiently install BUSY (0) encoding in the mark word. |
2673 // Transiently install BUSY (0) encoding in the mark word. |
2776 // if the CAS of 0 into the mark was successful then we execute: |
2674 // if the CAS of 0 into the mark was successful then we execute: |
2777 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box |
2675 // ST box->dhw = mark -- save fetched mark in on-stack basiclock box |
2778 // ST obj->mark = box -- overwrite transient 0 value |
2676 // ST obj->mark = box -- overwrite transient 0 value |
2779 // This presumes TSO, of course. |
2677 // This presumes TSO, of course. |
2780 |
2678 |
2781 mov(0, Rscratch); |
2679 mov(0, Rscratch); |
2782 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
2680 or3(Rmark, markOopDesc::unlocked_value, Rmark); |
2783 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
2681 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
2784 cas_ptr(mark_addr.base(), Rmark, Rscratch); |
2682 cas_ptr(mark_addr.base(), Rmark, Rscratch); |
2785 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
2683 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); |
2786 cmp(Rscratch, Rmark); |
2684 cmp(Rscratch, Rmark); |
2787 brx(Assembler::notZero, false, Assembler::pn, Recursive); |
2685 brx(Assembler::notZero, false, Assembler::pn, Recursive); |
2788 delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
2686 delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
2789 if (counters != NULL) { |
2687 if (counters != NULL) { |
2790 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
2688 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
2791 } |
|
2792 ba(done); |
|
2793 delayed()->st_ptr(Rbox, mark_addr); |
|
2794 |
|
2795 bind(Recursive); |
|
2796 // Stack-lock attempt failed - check for recursive stack-lock. |
|
2797 // Tests show that we can remove the recursive case with no impact |
|
2798 // on refworkload 0.83. If we need to reduce the size of the code |
|
2799 // emitted by compiler_lock_object() the recursive case is perfect |
|
2800 // candidate. |
|
2801 // |
|
2802 // A more extreme idea is to always inflate on stack-lock recursion. |
|
2803 // This lets us eliminate the recursive checks in compiler_lock_object |
|
2804 // and compiler_unlock_object and the (box->dhw == 0) encoding. |
|
2805 // A brief experiment - requiring changes to synchronizer.cpp, interpreter, |
|
2806 // and showed a performance *increase*. In the same experiment I eliminated |
|
2807 // the fast-path stack-lock code from the interpreter and always passed |
|
2808 // control to the "slow" operators in synchronizer.cpp. |
|
2809 |
|
2810 // RScratch contains the fetched obj->mark value from the failed CAS. |
|
2811 sub(Rscratch, STACK_BIAS, Rscratch); |
|
2812 sub(Rscratch, SP, Rscratch); |
|
2813 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
|
2814 andcc(Rscratch, 0xfffff003, Rscratch); |
|
2815 if (counters != NULL) { |
|
2816 // Accounting needs the Rscratch register |
|
2817 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2818 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
|
2819 ba_short(done); |
|
2820 } else { |
|
2821 ba(done); |
|
2822 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2823 } |
|
2824 |
|
2825 bind (IsInflated); |
|
2826 |
|
2827 // Try to CAS m->owner from null to Self |
|
2828 // Invariant: if we acquire the lock then _recursions should be 0. |
|
2829 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
|
2830 mov(G2_thread, Rscratch); |
|
2831 cas_ptr(Rmark, G0, Rscratch); |
|
2832 andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success |
|
2833 // set icc.zf : 1=success 0=failure |
|
2834 // ST box->displaced_header = NonZero. |
|
2835 // Any non-zero value suffices: |
|
2836 // markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc. |
|
2837 st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2838 // Intentional fall-through into done |
|
2839 } |
2689 } |
|
2690 ba(done); |
|
2691 delayed()->st_ptr(Rbox, mark_addr); |
|
2692 |
|
2693 bind(Recursive); |
|
2694 // Stack-lock attempt failed - check for recursive stack-lock. |
|
2695 // Tests show that we can remove the recursive case with no impact |
|
2696 // on refworkload 0.83. If we need to reduce the size of the code |
|
2697 // emitted by compiler_lock_object() the recursive case is perfect |
|
2698 // candidate. |
|
2699 // |
|
2700 // A more extreme idea is to always inflate on stack-lock recursion. |
|
2701 // This lets us eliminate the recursive checks in compiler_lock_object |
|
2702 // and compiler_unlock_object and the (box->dhw == 0) encoding. |
|
2703 // A brief experiment - requiring changes to synchronizer.cpp, interpreter, |
|
2704 // and showed a performance *increase*. In the same experiment I eliminated |
|
2705 // the fast-path stack-lock code from the interpreter and always passed |
|
2706 // control to the "slow" operators in synchronizer.cpp. |
|
2707 |
|
2708 // RScratch contains the fetched obj->mark value from the failed CAS. |
|
2709 sub(Rscratch, STACK_BIAS, Rscratch); |
|
2710 sub(Rscratch, SP, Rscratch); |
|
2711 assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); |
|
2712 andcc(Rscratch, 0xfffff003, Rscratch); |
|
2713 if (counters != NULL) { |
|
2714 // Accounting needs the Rscratch register |
|
2715 st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2716 cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); |
|
2717 ba_short(done); |
|
2718 } else { |
|
2719 ba(done); |
|
2720 delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2721 } |
|
2722 |
|
2723 bind (IsInflated); |
|
2724 |
|
2725 // Try to CAS m->owner from null to Self |
|
2726 // Invariant: if we acquire the lock then _recursions should be 0. |
|
2727 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
|
2728 mov(G2_thread, Rscratch); |
|
2729 cas_ptr(Rmark, G0, Rscratch); |
|
2730 andcc(Rscratch, Rscratch, G0); // set ICCs for done: icc.zf iff success |
|
2731 // set icc.zf : 1=success 0=failure |
|
2732 // ST box->displaced_header = NonZero. |
|
2733 // Any non-zero value suffices: |
|
2734 // markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc. |
|
2735 st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); |
|
2736 // Intentional fall-through into done |
2840 |
2737 |
2841 bind (done); |
2738 bind (done); |
2842 } |
2739 } |
2843 |
2740 |
2844 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, |
2741 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, |
2845 Register Rbox, Register Rscratch, |
2742 Register Rbox, Register Rscratch, |
2846 bool try_bias) { |
2743 bool try_bias) { |
2847 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); |
2744 Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); |
2848 |
2745 |
2849 Label done ; |
2746 Label done ; |
2850 |
|
2851 if (EmitSync & 4) { |
|
2852 cmp(SP, G0); |
|
2853 return ; |
|
2854 } |
|
2855 |
|
2856 if (EmitSync & 8) { |
|
2857 if (try_bias) { |
|
2858 biased_locking_exit(mark_addr, Rscratch, done); |
|
2859 } |
|
2860 |
|
2861 // Test first if it is a fast recursive unlock |
|
2862 ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); |
|
2863 br_null_short(Rmark, Assembler::pt, done); |
|
2864 |
|
2865 // Check if it is still a light weight lock, this is is true if we see |
|
2866 // the stack address of the basicLock in the markOop of the object |
|
2867 assert(mark_addr.disp() == 0, "cas must take a zero displacement"); |
|
2868 cas_ptr(mark_addr.base(), Rbox, Rmark); |
|
2869 ba(done); |
|
2870 delayed()->cmp(Rbox, Rmark); |
|
2871 bind(done); |
|
2872 return ; |
|
2873 } |
|
2874 |
2747 |
2875 // Beware ... If the aggregate size of the code emitted by CLO and CUO is |
2748 // Beware ... If the aggregate size of the code emitted by CLO and CUO is |
2876 // is too large performance rolls abruptly off a cliff. |
2749 // is too large performance rolls abruptly off a cliff. |
2877 // This could be related to inlining policies, code cache management, or |
2750 // This could be related to inlining policies, code cache management, or |
2878 // I$ effects. |
2751 // I$ effects. |
2900 // and that particular flavor of barrier is a noop, so we can safely elide it. |
2773 // and that particular flavor of barrier is a noop, so we can safely elide it. |
2901 // Note that we use 1-0 locking by default for the inflated case. We |
2774 // Note that we use 1-0 locking by default for the inflated case. We |
2902 // close the resultant (and rare) race by having contended threads in |
2775 // close the resultant (and rare) race by having contended threads in |
2903 // monitorenter periodically poll _owner. |
2776 // monitorenter periodically poll _owner. |
2904 |
2777 |
2905 if (EmitSync & 1024) { |
2778 // 1-0 form : avoids CAS and MEMBAR in the common case |
2906 // Emit code to check that _owner == Self |
2779 // Do not bother to ratify that m->Owner == Self. |
2907 // We could fold the _owner test into subsequent code more efficiently |
2780 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); |
2908 // than using a stand-alone check, but since _owner checking is off by |
2781 orcc(Rbox, G0, G0); |
2909 // default we don't bother. We also might consider predicating the |
2782 brx(Assembler::notZero, false, Assembler::pn, done); |
2910 // _owner==Self check on Xcheck:jni or running on a debug build. |
2783 delayed()-> |
2911 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch); |
2784 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); |
2912 orcc(Rscratch, G0, G0); |
2785 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); |
2913 brx(Assembler::notZero, false, Assembler::pn, done); |
2786 orcc(Rbox, Rscratch, G0); |
2914 delayed()->nop(); |
2787 brx(Assembler::zero, false, Assembler::pt, done); |
2915 } |
2788 delayed()-> |
2916 |
2789 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2917 if (EmitSync & 512) { |
2790 |
2918 // classic lock release code absent 1-0 locking |
2791 if (os::is_MP()) { membar(StoreLoad); } |
2919 // m->Owner = null; |
2792 // Check that _succ is (or remains) non-zero |
2920 // membar #storeload |
2793 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); |
2921 // if (m->cxq|m->EntryList) == null goto Success |
2794 andcc(Rscratch, Rscratch, G0); |
2922 // if (m->succ != null) goto Success |
2795 brx(Assembler::notZero, false, Assembler::pt, done); |
2923 // if CAS (&m->Owner,0,Self) != 0 goto Success |
2796 delayed()->andcc(G0, G0, G0); |
2924 // goto SlowPath |
2797 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
2925 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); |
2798 mov(G2_thread, Rscratch); |
2926 orcc(Rbox, G0, G0); |
2799 cas_ptr(Rmark, G0, Rscratch); |
2927 brx(Assembler::notZero, false, Assembler::pn, done); |
2800 cmp(Rscratch, G0); |
2928 delayed()->nop(); |
2801 // invert icc.zf and goto done |
2929 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
2802 // A slightly better v8+/v9 idiom would be the following: |
2930 if (os::is_MP()) { membar(StoreLoad); } |
2803 // movrnz Rscratch,1,Rscratch |
2931 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); |
2804 // ba done |
2932 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); |
2805 // xorcc Rscratch,1,G0 |
2933 orcc(Rbox, Rscratch, G0); |
2806 // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register |
2934 brx(Assembler::zero, false, Assembler::pt, done); |
2807 brx(Assembler::notZero, false, Assembler::pt, done); |
2935 delayed()-> |
2808 delayed()->cmp(G0, G0); |
2936 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); |
2809 br(Assembler::always, false, Assembler::pt, done); |
2937 andcc(Rscratch, Rscratch, G0); |
2810 delayed()->cmp(G0, 1); |
2938 brx(Assembler::notZero, false, Assembler::pt, done); |
|
2939 delayed()->andcc(G0, G0, G0); |
|
2940 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
|
2941 mov(G2_thread, Rscratch); |
|
2942 cas_ptr(Rmark, G0, Rscratch); |
|
2943 cmp(Rscratch, G0); |
|
2944 // invert icc.zf and goto done |
|
2945 brx(Assembler::notZero, false, Assembler::pt, done); |
|
2946 delayed()->cmp(G0, G0); |
|
2947 br(Assembler::always, false, Assembler::pt, done); |
|
2948 delayed()->cmp(G0, 1); |
|
2949 } else { |
|
2950 // 1-0 form : avoids CAS and MEMBAR in the common case |
|
2951 // Do not bother to ratify that m->Owner == Self. |
|
2952 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox); |
|
2953 orcc(Rbox, G0, G0); |
|
2954 brx(Assembler::notZero, false, Assembler::pn, done); |
|
2955 delayed()-> |
|
2956 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch); |
|
2957 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox); |
|
2958 orcc(Rbox, Rscratch, G0); |
|
2959 if (EmitSync & 16384) { |
|
2960 // As an optional optimization, if (EntryList|cxq) != null and _succ is null then |
|
2961 // we should transfer control directly to the slow-path. |
|
2962 // This test makes the reacquire operation below very infrequent. |
|
2963 // The logic is equivalent to : |
|
2964 // if (cxq|EntryList) == null : Owner=null; goto Success |
|
2965 // if succ == null : goto SlowPath |
|
2966 // Owner=null; membar #storeload |
|
2967 // if succ != null : goto Success |
|
2968 // if CAS(&Owner,null,Self) != null goto Success |
|
2969 // goto SlowPath |
|
2970 brx(Assembler::zero, true, Assembler::pt, done); |
|
2971 delayed()-> |
|
2972 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2973 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); |
|
2974 andcc(Rscratch, Rscratch, G0) ; |
|
2975 brx(Assembler::zero, false, Assembler::pt, done); |
|
2976 delayed()->orcc(G0, 1, G0); |
|
2977 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2978 } else { |
|
2979 brx(Assembler::zero, false, Assembler::pt, done); |
|
2980 delayed()-> |
|
2981 st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); |
|
2982 } |
|
2983 if (os::is_MP()) { membar(StoreLoad); } |
|
2984 // Check that _succ is (or remains) non-zero |
|
2985 ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch); |
|
2986 andcc(Rscratch, Rscratch, G0); |
|
2987 brx(Assembler::notZero, false, Assembler::pt, done); |
|
2988 delayed()->andcc(G0, G0, G0); |
|
2989 add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark); |
|
2990 mov(G2_thread, Rscratch); |
|
2991 cas_ptr(Rmark, G0, Rscratch); |
|
2992 cmp(Rscratch, G0); |
|
2993 // invert icc.zf and goto done |
|
2994 // A slightly better v8+/v9 idiom would be the following: |
|
2995 // movrnz Rscratch,1,Rscratch |
|
2996 // ba done |
|
2997 // xorcc Rscratch,1,G0 |
|
2998 // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register |
|
2999 brx(Assembler::notZero, false, Assembler::pt, done); |
|
3000 delayed()->cmp(G0, G0); |
|
3001 br(Assembler::always, false, Assembler::pt, done); |
|
3002 delayed()->cmp(G0, 1); |
|
3003 } |
|
3004 |
2811 |
3005 bind (LStacked); |
2812 bind (LStacked); |
3006 // Consider: we could replace the expensive CAS in the exit |
2813 // Consider: we could replace the expensive CAS in the exit |
3007 // path with a simple ST of the displaced mark value fetched from |
2814 // path with a simple ST of the displaced mark value fetched from |
3008 // the on-stack basiclock box. That admits a race where a thread T2 |
2815 // the on-stack basiclock box. That admits a race where a thread T2 |