src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
changeset 51663 a65d8a6fa424
parent 50803 45c1fde86050
child 51756 4bd35a5ec694
equal deleted inserted replaced
51662:fe4349d27282 51663:a65d8a6fa424
  2646 
  2646 
  2647    if (counters != NULL) {
  2647    if (counters != NULL) {
  2648      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
  2648      inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch);
  2649    }
  2649    }
  2650 
  2650 
  2651    if (EmitSync & 1) {
       
  2652      mov(3, Rscratch);
       
  2653      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2654      cmp(SP, G0);
       
  2655      return ;
       
  2656    }
       
  2657 
       
  2658    if (EmitSync & 2) {
       
  2659 
       
  2660      // Fetch object's markword
       
  2661      ld_ptr(mark_addr, Rmark);
       
  2662 
       
  2663      if (try_bias) {
       
  2664         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
       
  2665      }
       
  2666 
       
  2667      // Save Rbox in Rscratch to be used for the cas operation
       
  2668      mov(Rbox, Rscratch);
       
  2669 
       
  2670      // set Rmark to markOop | markOopDesc::unlocked_value
       
  2671      or3(Rmark, markOopDesc::unlocked_value, Rmark);
       
  2672 
       
  2673      // Initialize the box.  (Must happen before we update the object mark!)
       
  2674      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2675 
       
  2676      // compare object markOop with Rmark and if equal exchange Rscratch with object markOop
       
  2677      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
       
  2678      cas_ptr(mark_addr.base(), Rmark, Rscratch);
       
  2679 
       
  2680      // if compare/exchange succeeded we found an unlocked object and we now have locked it
       
  2681      // hence we are done
       
  2682      cmp(Rmark, Rscratch);
       
  2683      sub(Rscratch, STACK_BIAS, Rscratch);
       
  2684      brx(Assembler::equal, false, Assembler::pt, done);
       
  2685      delayed()->sub(Rscratch, SP, Rscratch);  //pull next instruction into delay slot
       
  2686 
       
  2687      // we did not find an unlocked object so see if this is a recursive case
       
  2688      // sub(Rscratch, SP, Rscratch);
       
  2689      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
       
  2690      andcc(Rscratch, 0xfffff003, Rscratch);
       
  2691      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2692      bind (done);
       
  2693      return ;
       
  2694    }
       
  2695 
       
  2696    Label Egress ;
  2651    Label Egress ;
  2697 
  2652 
  2698    if (EmitSync & 256) {
  2653    // Aggressively avoid the Store-before-CAS penalty
  2699       Label IsInflated ;
  2654    // Defer the store into box->dhw until after the CAS
  2700 
  2655    Label IsInflated, Recursive ;
  2701       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
       
  2702       // Triage: biased, stack-locked, neutral, inflated
       
  2703       if (try_bias) {
       
  2704         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
       
  2705         // Invariant: if control reaches this point in the emitted stream
       
  2706         // then Rmark has not been modified.
       
  2707       }
       
  2708 
       
  2709       // Store mark into displaced mark field in the on-stack basic-lock "box"
       
  2710       // Critically, this must happen before the CAS
       
  2711       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
       
  2712       st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2713       andcc(Rmark, 2, G0);
       
  2714       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
       
  2715       delayed()->
       
  2716 
       
  2717       // Try stack-lock acquisition.
       
  2718       // Beware: the 1st instruction is in a delay slot
       
  2719       mov(Rbox,  Rscratch);
       
  2720       or3(Rmark, markOopDesc::unlocked_value, Rmark);
       
  2721       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
       
  2722       cas_ptr(mark_addr.base(), Rmark, Rscratch);
       
  2723       cmp(Rmark, Rscratch);
       
  2724       brx(Assembler::equal, false, Assembler::pt, done);
       
  2725       delayed()->sub(Rscratch, SP, Rscratch);
       
  2726 
       
  2727       // Stack-lock attempt failed - check for recursive stack-lock.
       
  2728       // See the comments below about how we might remove this case.
       
  2729       sub(Rscratch, STACK_BIAS, Rscratch);
       
  2730       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
       
  2731       andcc(Rscratch, 0xfffff003, Rscratch);
       
  2732       br(Assembler::always, false, Assembler::pt, done);
       
  2733       delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2734 
       
  2735       bind(IsInflated);
       
  2736       if (EmitSync & 64) {
       
  2737          // If m->owner != null goto IsLocked
       
  2738          // Pessimistic form: Test-and-CAS vs CAS
       
  2739          // The optimistic form avoids RTS->RTO cache line upgrades.
       
  2740          ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
       
  2741          andcc(Rscratch, Rscratch, G0);
       
  2742          brx(Assembler::notZero, false, Assembler::pn, done);
       
  2743          delayed()->nop();
       
  2744          // m->owner == null : it's unlocked.
       
  2745       }
       
  2746 
       
  2747       // Try to CAS m->owner from null to Self
       
  2748       // Invariant: if we acquire the lock then _recursions should be 0.
       
  2749       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       
  2750       mov(G2_thread, Rscratch);
       
  2751       cas_ptr(Rmark, G0, Rscratch);
       
  2752       cmp(Rscratch, G0);
       
  2753       // Intentional fall-through into done
       
  2754    } else {
       
  2755       // Aggressively avoid the Store-before-CAS penalty
       
  2756       // Defer the store into box->dhw until after the CAS
       
  2757       Label IsInflated, Recursive ;
       
  2758 
  2656 
  2759 // Anticipate CAS -- Avoid RTS->RTO upgrade
  2657 // Anticipate CAS -- Avoid RTS->RTO upgrade
  2760 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
  2658 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
  2761 
  2659 
  2762       ld_ptr(mark_addr, Rmark);           // fetch obj->mark
  2660    ld_ptr(mark_addr, Rmark);           // fetch obj->mark
  2763       // Triage: biased, stack-locked, neutral, inflated
  2661    // Triage: biased, stack-locked, neutral, inflated
  2764 
  2662 
  2765       if (try_bias) {
  2663    if (try_bias) {
  2766         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
  2664      biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
  2767         // Invariant: if control reaches this point in the emitted stream
  2665      // Invariant: if control reaches this point in the emitted stream
  2768         // then Rmark has not been modified.
  2666      // then Rmark has not been modified.
  2769       }
  2667    }
  2770       andcc(Rmark, 2, G0);
  2668    andcc(Rmark, 2, G0);
  2771       brx(Assembler::notZero, false, Assembler::pn, IsInflated);
  2669    brx(Assembler::notZero, false, Assembler::pn, IsInflated);
  2772       delayed()->                         // Beware - dangling delay-slot
  2670    delayed()->                         // Beware - dangling delay-slot
  2773 
  2671 
  2774       // Try stack-lock acquisition.
  2672    // Try stack-lock acquisition.
  2775       // Transiently install BUSY (0) encoding in the mark word.
  2673    // Transiently install BUSY (0) encoding in the mark word.
  2776       // if the CAS of 0 into the mark was successful then we execute:
  2674    // if the CAS of 0 into the mark was successful then we execute:
  2777       //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
  2675    //   ST box->dhw  = mark   -- save fetched mark in on-stack basiclock box
  2778       //   ST obj->mark = box    -- overwrite transient 0 value
  2676    //   ST obj->mark = box    -- overwrite transient 0 value
  2779       // This presumes TSO, of course.
  2677    // This presumes TSO, of course.
  2780 
  2678 
  2781       mov(0, Rscratch);
  2679    mov(0, Rscratch);
  2782       or3(Rmark, markOopDesc::unlocked_value, Rmark);
  2680    or3(Rmark, markOopDesc::unlocked_value, Rmark);
  2783       assert(mark_addr.disp() == 0, "cas must take a zero displacement");
  2681    assert(mark_addr.disp() == 0, "cas must take a zero displacement");
  2784       cas_ptr(mark_addr.base(), Rmark, Rscratch);
  2682    cas_ptr(mark_addr.base(), Rmark, Rscratch);
  2785 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
  2683 // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
  2786       cmp(Rscratch, Rmark);
  2684    cmp(Rscratch, Rmark);
  2787       brx(Assembler::notZero, false, Assembler::pn, Recursive);
  2685    brx(Assembler::notZero, false, Assembler::pn, Recursive);
  2788       delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
  2686    delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
  2789       if (counters != NULL) {
  2687    if (counters != NULL) {
  2790         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
  2688      cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
  2791       }
       
  2792       ba(done);
       
  2793       delayed()->st_ptr(Rbox, mark_addr);
       
  2794 
       
  2795       bind(Recursive);
       
  2796       // Stack-lock attempt failed - check for recursive stack-lock.
       
  2797       // Tests show that we can remove the recursive case with no impact
       
  2798       // on refworkload 0.83.  If we need to reduce the size of the code
       
  2799       // emitted by compiler_lock_object() the recursive case is perfect
       
  2800       // candidate.
       
  2801       //
       
  2802       // A more extreme idea is to always inflate on stack-lock recursion.
       
  2803       // This lets us eliminate the recursive checks in compiler_lock_object
       
  2804       // and compiler_unlock_object and the (box->dhw == 0) encoding.
       
  2805       // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
       
  2806       // and showed a performance *increase*.  In the same experiment I eliminated
       
  2807       // the fast-path stack-lock code from the interpreter and always passed
       
  2808       // control to the "slow" operators in synchronizer.cpp.
       
  2809 
       
  2810       // RScratch contains the fetched obj->mark value from the failed CAS.
       
  2811       sub(Rscratch, STACK_BIAS, Rscratch);
       
  2812       sub(Rscratch, SP, Rscratch);
       
  2813       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
       
  2814       andcc(Rscratch, 0xfffff003, Rscratch);
       
  2815       if (counters != NULL) {
       
  2816         // Accounting needs the Rscratch register
       
  2817         st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2818         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
       
  2819         ba_short(done);
       
  2820       } else {
       
  2821         ba(done);
       
  2822         delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2823       }
       
  2824 
       
  2825       bind   (IsInflated);
       
  2826 
       
  2827       // Try to CAS m->owner from null to Self
       
  2828       // Invariant: if we acquire the lock then _recursions should be 0.
       
  2829       add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       
  2830       mov(G2_thread, Rscratch);
       
  2831       cas_ptr(Rmark, G0, Rscratch);
       
  2832       andcc(Rscratch, Rscratch, G0);             // set ICCs for done: icc.zf iff success
       
  2833       // set icc.zf : 1=success 0=failure
       
  2834       // ST box->displaced_header = NonZero.
       
  2835       // Any non-zero value suffices:
       
  2836       //    markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
       
  2837       st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2838       // Intentional fall-through into done
       
  2839    }
  2689    }
       
  2690    ba(done);
       
  2691    delayed()->st_ptr(Rbox, mark_addr);
       
  2692 
       
  2693    bind(Recursive);
       
  2694    // Stack-lock attempt failed - check for recursive stack-lock.
       
  2695    // Tests show that we can remove the recursive case with no impact
       
  2696    // on refworkload 0.83.  If we need to reduce the size of the code
       
  2697    // emitted by compiler_lock_object() the recursive case is perfect
       
  2698    // candidate.
       
  2699    //
       
  2700    // A more extreme idea is to always inflate on stack-lock recursion.
       
  2701    // This lets us eliminate the recursive checks in compiler_lock_object
       
  2702    // and compiler_unlock_object and the (box->dhw == 0) encoding.
       
  2703    // A brief experiment - requiring changes to synchronizer.cpp, interpreter,
       
  2704    // and showed a performance *increase*.  In the same experiment I eliminated
       
  2705    // the fast-path stack-lock code from the interpreter and always passed
       
  2706    // control to the "slow" operators in synchronizer.cpp.
       
  2707 
       
  2708    // RScratch contains the fetched obj->mark value from the failed CAS.
       
  2709    sub(Rscratch, STACK_BIAS, Rscratch);
       
  2710    sub(Rscratch, SP, Rscratch);
       
  2711    assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
       
  2712    andcc(Rscratch, 0xfffff003, Rscratch);
       
  2713    if (counters != NULL) {
       
  2714      // Accounting needs the Rscratch register
       
  2715      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2716      cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
       
  2717      ba_short(done);
       
  2718    } else {
       
  2719      ba(done);
       
  2720      delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2721    }
       
  2722 
       
  2723    bind   (IsInflated);
       
  2724 
       
  2725    // Try to CAS m->owner from null to Self
       
  2726    // Invariant: if we acquire the lock then _recursions should be 0.
       
  2727    add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       
  2728    mov(G2_thread, Rscratch);
       
  2729    cas_ptr(Rmark, G0, Rscratch);
       
  2730    andcc(Rscratch, Rscratch, G0);             // set ICCs for done: icc.zf iff success
       
  2731    // set icc.zf : 1=success 0=failure
       
  2732    // ST box->displaced_header = NonZero.
       
  2733    // Any non-zero value suffices:
       
  2734    //    markOopDesc::unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
       
  2735    st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
       
  2736    // Intentional fall-through into done
  2840 
  2737 
  2841    bind   (done);
  2738    bind   (done);
  2842 }
  2739 }
  2843 
  2740 
  2844 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
  2741 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
  2845                                             Register Rbox, Register Rscratch,
  2742                                             Register Rbox, Register Rscratch,
  2846                                             bool try_bias) {
  2743                                             bool try_bias) {
  2847    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
  2744    Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
  2848 
  2745 
  2849    Label done ;
  2746    Label done ;
  2850 
       
  2851    if (EmitSync & 4) {
       
  2852      cmp(SP, G0);
       
  2853      return ;
       
  2854    }
       
  2855 
       
  2856    if (EmitSync & 8) {
       
  2857      if (try_bias) {
       
  2858         biased_locking_exit(mark_addr, Rscratch, done);
       
  2859      }
       
  2860 
       
  2861      // Test first if it is a fast recursive unlock
       
  2862      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
       
  2863      br_null_short(Rmark, Assembler::pt, done);
       
  2864 
       
  2865      // Check if it is still a light weight lock, this is is true if we see
       
  2866      // the stack address of the basicLock in the markOop of the object
       
  2867      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
       
  2868      cas_ptr(mark_addr.base(), Rbox, Rmark);
       
  2869      ba(done);
       
  2870      delayed()->cmp(Rbox, Rmark);
       
  2871      bind(done);
       
  2872      return ;
       
  2873    }
       
  2874 
  2747 
  2875    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
  2748    // Beware ... If the aggregate size of the code emitted by CLO and CUO is
  2876    // is too large performance rolls abruptly off a cliff.
  2749    // is too large performance rolls abruptly off a cliff.
  2877    // This could be related to inlining policies, code cache management, or
  2750    // This could be related to inlining policies, code cache management, or
  2878    // I$ effects.
  2751    // I$ effects.
  2900    // and that particular flavor of barrier is a noop, so we can safely elide it.
  2773    // and that particular flavor of barrier is a noop, so we can safely elide it.
  2901    // Note that we use 1-0 locking by default for the inflated case.  We
  2774    // Note that we use 1-0 locking by default for the inflated case.  We
  2902    // close the resultant (and rare) race by having contended threads in
  2775    // close the resultant (and rare) race by having contended threads in
  2903    // monitorenter periodically poll _owner.
  2776    // monitorenter periodically poll _owner.
  2904 
  2777 
  2905    if (EmitSync & 1024) {
  2778    // 1-0 form : avoids CAS and MEMBAR in the common case
  2906      // Emit code to check that _owner == Self
  2779    // Do not bother to ratify that m->Owner == Self.
  2907      // We could fold the _owner test into subsequent code more efficiently
  2780    ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
  2908      // than using a stand-alone check, but since _owner checking is off by
  2781    orcc(Rbox, G0, G0);
  2909      // default we don't bother. We also might consider predicating the
  2782    brx(Assembler::notZero, false, Assembler::pn, done);
  2910      // _owner==Self check on Xcheck:jni or running on a debug build.
  2783    delayed()->
  2911      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
  2784    ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
  2912      orcc(Rscratch, G0, G0);
  2785    ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
  2913      brx(Assembler::notZero, false, Assembler::pn, done);
  2786    orcc(Rbox, Rscratch, G0);
  2914      delayed()->nop();
  2787    brx(Assembler::zero, false, Assembler::pt, done);
  2915    }
  2788    delayed()->
  2916 
  2789    st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2917    if (EmitSync & 512) {
  2790 
  2918      // classic lock release code absent 1-0 locking
  2791    if (os::is_MP()) { membar(StoreLoad); }
  2919      //   m->Owner = null;
  2792    // Check that _succ is (or remains) non-zero
  2920      //   membar #storeload
  2793    ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
  2921      //   if (m->cxq|m->EntryList) == null goto Success
  2794    andcc(Rscratch, Rscratch, G0);
  2922      //   if (m->succ != null) goto Success
  2795    brx(Assembler::notZero, false, Assembler::pt, done);
  2923      //   if CAS (&m->Owner,0,Self) != 0 goto Success
  2796    delayed()->andcc(G0, G0, G0);
  2924      //   goto SlowPath
  2797    add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
  2925      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
  2798    mov(G2_thread, Rscratch);
  2926      orcc(Rbox, G0, G0);
  2799    cas_ptr(Rmark, G0, Rscratch);
  2927      brx(Assembler::notZero, false, Assembler::pn, done);
  2800    cmp(Rscratch, G0);
  2928      delayed()->nop();
  2801    // invert icc.zf and goto done
  2929      st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
  2802    // A slightly better v8+/v9 idiom would be the following:
  2930      if (os::is_MP()) { membar(StoreLoad); }
  2803    //   movrnz Rscratch,1,Rscratch
  2931      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
  2804    //   ba done
  2932      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
  2805    //   xorcc Rscratch,1,G0
  2933      orcc(Rbox, Rscratch, G0);
  2806    // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
  2934      brx(Assembler::zero, false, Assembler::pt, done);
  2807    brx(Assembler::notZero, false, Assembler::pt, done);
  2935      delayed()->
  2808    delayed()->cmp(G0, G0);
  2936      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
  2809    br(Assembler::always, false, Assembler::pt, done);
  2937      andcc(Rscratch, Rscratch, G0);
  2810    delayed()->cmp(G0, 1);
  2938      brx(Assembler::notZero, false, Assembler::pt, done);
       
  2939      delayed()->andcc(G0, G0, G0);
       
  2940      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       
  2941      mov(G2_thread, Rscratch);
       
  2942      cas_ptr(Rmark, G0, Rscratch);
       
  2943      cmp(Rscratch, G0);
       
  2944      // invert icc.zf and goto done
       
  2945      brx(Assembler::notZero, false, Assembler::pt, done);
       
  2946      delayed()->cmp(G0, G0);
       
  2947      br(Assembler::always, false, Assembler::pt, done);
       
  2948      delayed()->cmp(G0, 1);
       
  2949    } else {
       
  2950      // 1-0 form : avoids CAS and MEMBAR in the common case
       
  2951      // Do not bother to ratify that m->Owner == Self.
       
  2952      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
       
  2953      orcc(Rbox, G0, G0);
       
  2954      brx(Assembler::notZero, false, Assembler::pn, done);
       
  2955      delayed()->
       
  2956      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
       
  2957      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
       
  2958      orcc(Rbox, Rscratch, G0);
       
  2959      if (EmitSync & 16384) {
       
  2960        // As an optional optimization, if (EntryList|cxq) != null and _succ is null then
       
  2961        // we should transfer control directly to the slow-path.
       
  2962        // This test makes the reacquire operation below very infrequent.
       
  2963        // The logic is equivalent to :
       
  2964        //   if (cxq|EntryList) == null : Owner=null; goto Success
       
  2965        //   if succ == null : goto SlowPath
       
  2966        //   Owner=null; membar #storeload
       
  2967        //   if succ != null : goto Success
       
  2968        //   if CAS(&Owner,null,Self) != null goto Success
       
  2969        //   goto SlowPath
       
  2970        brx(Assembler::zero, true, Assembler::pt, done);
       
  2971        delayed()->
       
  2972        st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  2973        ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
       
  2974        andcc(Rscratch, Rscratch, G0) ;
       
  2975        brx(Assembler::zero, false, Assembler::pt, done);
       
  2976        delayed()->orcc(G0, 1, G0);
       
  2977        st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  2978      } else {
       
  2979        brx(Assembler::zero, false, Assembler::pt, done);
       
  2980        delayed()->
       
  2981        st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       
  2982      }
       
  2983      if (os::is_MP()) { membar(StoreLoad); }
       
  2984      // Check that _succ is (or remains) non-zero
       
  2985      ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
       
  2986      andcc(Rscratch, Rscratch, G0);
       
  2987      brx(Assembler::notZero, false, Assembler::pt, done);
       
  2988      delayed()->andcc(G0, G0, G0);
       
  2989      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
       
  2990      mov(G2_thread, Rscratch);
       
  2991      cas_ptr(Rmark, G0, Rscratch);
       
  2992      cmp(Rscratch, G0);
       
  2993      // invert icc.zf and goto done
       
  2994      // A slightly better v8+/v9 idiom would be the following:
       
  2995      //   movrnz Rscratch,1,Rscratch
       
  2996      //   ba done
       
  2997      //   xorcc Rscratch,1,G0
       
  2998      // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
       
  2999      brx(Assembler::notZero, false, Assembler::pt, done);
       
  3000      delayed()->cmp(G0, G0);
       
  3001      br(Assembler::always, false, Assembler::pt, done);
       
  3002      delayed()->cmp(G0, 1);
       
  3003    }
       
  3004 
  2811 
  3005    bind   (LStacked);
  2812    bind   (LStacked);
  3006    // Consider: we could replace the expensive CAS in the exit
  2813    // Consider: we could replace the expensive CAS in the exit
  3007    // path with a simple ST of the displaced mark value fetched from
  2814    // path with a simple ST of the displaced mark value fetched from
  3008    // the on-stack basiclock box.  That admits a race where a thread T2
  2815    // the on-stack basiclock box.  That admits a race where a thread T2