src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
changeset 55076 785a12e0f89b
parent 54766 1321f8cf9de5
child 55146 aa5eeb1a9871
child 58678 9cf78a70fa4f
equal deleted inserted replaced
55075:044f2ca6ce22 55076:785a12e0f89b
   315   if(tosca_live) __ pop(rax);
   315   if(tosca_live) __ pop(rax);
   316 
   316 
   317   __ bind(done);
   317   __ bind(done);
   318 }
   318 }
   319 
   319 
   320 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst) {
   320 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) {
   321   assert(ShenandoahCASBarrier, "should be enabled");
   321   assert(ShenandoahCASBarrier, "should be enabled");
   322   Label is_null;
   322   Label is_null;
   323   __ testptr(dst, dst);
   323   __ testptr(dst, dst);
   324   __ jcc(Assembler::zero, is_null);
   324   __ jcc(Assembler::zero, is_null);
   325   resolve_forward_pointer_not_null(masm, dst);
   325   resolve_forward_pointer_not_null(masm, dst, tmp);
   326   __ bind(is_null);
   326   __ bind(is_null);
   327 }
   327 }
   328 
   328 
   329 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst) {
   329 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) {
   330   assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled");
   330   assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled");
   331   __ movptr(dst, Address(dst, ShenandoahForwarding::byte_offset()));
   331   // The below loads the mark word, checks if the lowest two bits are
       
   332   // set, and if so, clear the lowest two bits and copy the result
       
   333   // to dst. Otherwise it leaves dst alone.
       
   334   // Implementing this is surprisingly awkward. I do it here by:
       
   335   // - Inverting the mark word
       
   336   // - Test lowest two bits == 0
       
   337   // - If so, set the lowest two bits
       
   338   // - Invert the result back, and copy to dst
       
   339 
       
   340   bool borrow_reg = (tmp == noreg);
       
   341   if (borrow_reg) {
       
   342     // No free registers available. Make one useful.
       
   343     tmp = rscratch1;
       
   344     __ push(tmp);
       
   345   }
       
   346 
       
   347   Label done;
       
   348   __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
       
   349   __ notptr(tmp);
       
   350   __ testb(tmp, markOopDesc::marked_value);
       
   351   __ jccb(Assembler::notZero, done);
       
   352   __ orptr(tmp, markOopDesc::marked_value);
       
   353   __ notptr(tmp);
       
   354   __ mov(dst, tmp);
       
   355   __ bind(done);
       
   356 
       
   357   if (borrow_reg) {
       
   358     __ pop(tmp);
       
   359   }
   332 }
   360 }
   333 
   361 
   334 
   362 
   335 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
   363 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
   336   assert(ShenandoahLoadRefBarrier, "Should be enabled");
   364   assert(ShenandoahLoadRefBarrier, "Should be enabled");
   337 #ifdef _LP64
   365 #ifdef _LP64
   338   Label done;
   366   Label done;
   339 
   367 
   340   Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
   368   Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
   341   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL);
   369   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
   342   __ jccb(Assembler::zero, done);
       
   343 
       
   344   // Heap is unstable, need to perform the resolve even if LRB is inactive
       
   345   resolve_forward_pointer_not_null(masm, dst);
       
   346 
       
   347   __ testb(gc_state, ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL);
       
   348   __ jccb(Assembler::zero, done);
   370   __ jccb(Assembler::zero, done);
   349 
   371 
   350    if (dst != rax) {
   372    if (dst != rax) {
   351      __ xchgptr(dst, rax); // Move obj into rax and save rax into obj.
   373      __ xchgptr(dst, rax); // Move obj into rax and save rax into obj.
   352    }
   374    }
   477   } else {
   499   } else {
   478     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
   500     BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
   479   }
   501   }
   480 }
   502 }
   481 
   503 
   482 void ShenandoahBarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
       
   483                                                   Register thread, Register obj,
       
   484                                                   Register var_size_in_bytes,
       
   485                                                   int con_size_in_bytes,
       
   486                                                   Register t1, Register t2,
       
   487                                                   Label& slow_case) {
       
   488   assert_different_registers(obj, t1, t2);
       
   489   assert_different_registers(obj, var_size_in_bytes, t1);
       
   490   Register end = t2;
       
   491   if (!thread->is_valid()) {
       
   492 #ifdef _LP64
       
   493     thread = r15_thread;
       
   494 #else
       
   495     assert(t1->is_valid(), "need temp reg");
       
   496     thread = t1;
       
   497     __ get_thread(thread);
       
   498 #endif
       
   499   }
       
   500 
       
   501   __ verify_tlab();
       
   502 
       
   503   __ movptr(obj, Address(thread, JavaThread::tlab_top_offset()));
       
   504   if (var_size_in_bytes == noreg) {
       
   505     __ lea(end, Address(obj, con_size_in_bytes + ShenandoahForwarding::byte_size()));
       
   506   } else {
       
   507     __ addptr(var_size_in_bytes, ShenandoahForwarding::byte_size());
       
   508     __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
       
   509   }
       
   510   __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
       
   511   __ jcc(Assembler::above, slow_case);
       
   512 
       
   513   // update the tlab top pointer
       
   514   __ movptr(Address(thread, JavaThread::tlab_top_offset()), end);
       
   515 
       
   516   // Initialize brooks pointer
       
   517 #ifdef _LP64
       
   518   __ incrementq(obj, ShenandoahForwarding::byte_size());
       
   519 #else
       
   520   __ incrementl(obj, ShenandoahForwarding::byte_size());
       
   521 #endif
       
   522   __ movptr(Address(obj, ShenandoahForwarding::byte_offset()), obj);
       
   523 
       
   524   // recover var_size_in_bytes if necessary
       
   525   if (var_size_in_bytes == end) {
       
   526     __ subptr(var_size_in_bytes, obj);
       
   527   }
       
   528   __ verify_tlab();
       
   529 }
       
   530 
       
   531 // Special Shenandoah CAS implementation that handles false negatives
   504 // Special Shenandoah CAS implementation that handles false negatives
   532 // due to concurrent evacuation.
   505 // due to concurrent evacuation.
   533 #ifndef _LP64
   506 #ifndef _LP64
   534 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
   507 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
   535                                                 Register res, Address addr, Register oldval, Register newval,
   508                                                 Register res, Address addr, Register oldval, Register newval,
   854   __ align(CodeEntryAlignment);
   827   __ align(CodeEntryAlignment);
   855   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
   828   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
   856   address start = __ pc();
   829   address start = __ pc();
   857 
   830 
   858 #ifdef _LP64
   831 #ifdef _LP64
   859   Label not_done;
   832   Label resolve_oop, slow_path;
   860 
   833 
   861   // We use RDI, which also serves as argument register for slow call.
   834   // We use RDI, which also serves as argument register for slow call.
   862   // RAX always holds the src object ptr, except after the slow call and
   835   // RAX always holds the src object ptr, except after the slow call and
   863   // the cmpxchg, then it holds the result.
   836   // the cmpxchg, then it holds the result.
   864   // R8 and RCX are used as temporary registers.
   837   // R8 and RCX are used as temporary registers.
   876   __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
   849   __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
   877   __ movbool(r8, Address(r8, rdi, Address::times_1));
   850   __ movbool(r8, Address(r8, rdi, Address::times_1));
   878   // unlive: rdi
   851   // unlive: rdi
   879   __ testbool(r8);
   852   __ testbool(r8);
   880   // unlive: r8
   853   // unlive: r8
   881   __ jccb(Assembler::notZero, not_done);
   854   __ jccb(Assembler::notZero, resolve_oop);
   882 
   855 
   883   __ pop(r8);
   856   __ pop(r8);
   884   __ pop(rdi);
   857   __ pop(rdi);
   885   __ ret(0);
   858   __ ret(0);
   886 
   859 
   887   __ bind(not_done);
   860   __ bind(resolve_oop);
       
   861 
       
   862   __ movptr(r8, Address(rax, oopDesc::mark_offset_in_bytes()));
       
   863   // Test if both lowest bits are set. We trick it by negating the bits
       
   864   // then test for both bits clear.
       
   865   __ notptr(r8);
       
   866   __ testb(r8, markOopDesc::marked_value);
       
   867   __ jccb(Assembler::notZero, slow_path);
       
   868   // Clear both lower bits. It's still inverted, so set them, and then invert back.
       
   869   __ orptr(r8, markOopDesc::marked_value);
       
   870   __ notptr(r8);
       
   871   // At this point, r8 contains the decoded forwarding pointer.
       
   872   __ mov(rax, r8);
       
   873 
       
   874   __ pop(r8);
       
   875   __ pop(rdi);
       
   876   __ ret(0);
       
   877 
       
   878   __ bind(slow_path);
   888 
   879 
   889   __ push(rcx);
   880   __ push(rcx);
   890   __ push(rdx);
   881   __ push(rdx);
   891   __ push(rdi);
   882   __ push(rdi);
   892   __ push(rsi);
   883   __ push(rsi);