315 if(tosca_live) __ pop(rax); |
315 if(tosca_live) __ pop(rax); |
316 |
316 |
317 __ bind(done); |
317 __ bind(done); |
318 } |
318 } |
319 |
319 |
320 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst) { |
320 void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { |
321 assert(ShenandoahCASBarrier, "should be enabled"); |
321 assert(ShenandoahCASBarrier, "should be enabled"); |
322 Label is_null; |
322 Label is_null; |
323 __ testptr(dst, dst); |
323 __ testptr(dst, dst); |
324 __ jcc(Assembler::zero, is_null); |
324 __ jcc(Assembler::zero, is_null); |
325 resolve_forward_pointer_not_null(masm, dst); |
325 resolve_forward_pointer_not_null(masm, dst, tmp); |
326 __ bind(is_null); |
326 __ bind(is_null); |
327 } |
327 } |
328 |
328 |
329 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst) { |
329 void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { |
330 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); |
330 assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); |
331 __ movptr(dst, Address(dst, ShenandoahForwarding::byte_offset())); |
331 // The below loads the mark word, checks if the lowest two bits are |
|
332 // set, and if so, clear the lowest two bits and copy the result |
|
333 // to dst. Otherwise it leaves dst alone. |
|
334 // Implementing this is surprisingly awkward. I do it here by: |
|
335 // - Inverting the mark word |
|
336 // - Test lowest two bits == 0 |
|
337 // - If so, set the lowest two bits |
|
338 // - Invert the result back, and copy to dst |
|
339 |
|
340 bool borrow_reg = (tmp == noreg); |
|
341 if (borrow_reg) { |
|
342 // No free registers available. Make one useful. |
|
343 tmp = rscratch1; |
|
344 __ push(tmp); |
|
345 } |
|
346 |
|
347 Label done; |
|
348 __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); |
|
349 __ notptr(tmp); |
|
350 __ testb(tmp, markOopDesc::marked_value); |
|
351 __ jccb(Assembler::notZero, done); |
|
352 __ orptr(tmp, markOopDesc::marked_value); |
|
353 __ notptr(tmp); |
|
354 __ mov(dst, tmp); |
|
355 __ bind(done); |
|
356 |
|
357 if (borrow_reg) { |
|
358 __ pop(tmp); |
|
359 } |
332 } |
360 } |
333 |
361 |
334 |
362 |
335 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { |
363 void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { |
336 assert(ShenandoahLoadRefBarrier, "Should be enabled"); |
364 assert(ShenandoahLoadRefBarrier, "Should be enabled"); |
337 #ifdef _LP64 |
365 #ifdef _LP64 |
338 Label done; |
366 Label done; |
339 |
367 |
340 Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); |
368 Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); |
341 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL); |
369 __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); |
342 __ jccb(Assembler::zero, done); |
|
343 |
|
344 // Heap is unstable, need to perform the resolve even if LRB is inactive |
|
345 resolve_forward_pointer_not_null(masm, dst); |
|
346 |
|
347 __ testb(gc_state, ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL); |
|
348 __ jccb(Assembler::zero, done); |
370 __ jccb(Assembler::zero, done); |
349 |
371 |
350 if (dst != rax) { |
372 if (dst != rax) { |
351 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. |
373 __ xchgptr(dst, rax); // Move obj into rax and save rax into obj. |
352 } |
374 } |
477 } else { |
499 } else { |
478 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); |
500 BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); |
479 } |
501 } |
480 } |
502 } |
481 |
503 |
482 void ShenandoahBarrierSetAssembler::tlab_allocate(MacroAssembler* masm, |
|
483 Register thread, Register obj, |
|
484 Register var_size_in_bytes, |
|
485 int con_size_in_bytes, |
|
486 Register t1, Register t2, |
|
487 Label& slow_case) { |
|
488 assert_different_registers(obj, t1, t2); |
|
489 assert_different_registers(obj, var_size_in_bytes, t1); |
|
490 Register end = t2; |
|
491 if (!thread->is_valid()) { |
|
492 #ifdef _LP64 |
|
493 thread = r15_thread; |
|
494 #else |
|
495 assert(t1->is_valid(), "need temp reg"); |
|
496 thread = t1; |
|
497 __ get_thread(thread); |
|
498 #endif |
|
499 } |
|
500 |
|
501 __ verify_tlab(); |
|
502 |
|
503 __ movptr(obj, Address(thread, JavaThread::tlab_top_offset())); |
|
504 if (var_size_in_bytes == noreg) { |
|
505 __ lea(end, Address(obj, con_size_in_bytes + ShenandoahForwarding::byte_size())); |
|
506 } else { |
|
507 __ addptr(var_size_in_bytes, ShenandoahForwarding::byte_size()); |
|
508 __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); |
|
509 } |
|
510 __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); |
|
511 __ jcc(Assembler::above, slow_case); |
|
512 |
|
513 // update the tlab top pointer |
|
514 __ movptr(Address(thread, JavaThread::tlab_top_offset()), end); |
|
515 |
|
516 // Initialize brooks pointer |
|
517 #ifdef _LP64 |
|
518 __ incrementq(obj, ShenandoahForwarding::byte_size()); |
|
519 #else |
|
520 __ incrementl(obj, ShenandoahForwarding::byte_size()); |
|
521 #endif |
|
522 __ movptr(Address(obj, ShenandoahForwarding::byte_offset()), obj); |
|
523 |
|
524 // recover var_size_in_bytes if necessary |
|
525 if (var_size_in_bytes == end) { |
|
526 __ subptr(var_size_in_bytes, obj); |
|
527 } |
|
528 __ verify_tlab(); |
|
529 } |
|
530 |
|
531 // Special Shenandoah CAS implementation that handles false negatives |
504 // Special Shenandoah CAS implementation that handles false negatives |
532 // due to concurrent evacuation. |
505 // due to concurrent evacuation. |
533 #ifndef _LP64 |
506 #ifndef _LP64 |
534 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, |
507 void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, |
535 Register res, Address addr, Register oldval, Register newval, |
508 Register res, Address addr, Register oldval, Register newval, |
854 __ align(CodeEntryAlignment); |
827 __ align(CodeEntryAlignment); |
855 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); |
828 StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); |
856 address start = __ pc(); |
829 address start = __ pc(); |
857 |
830 |
858 #ifdef _LP64 |
831 #ifdef _LP64 |
859 Label not_done; |
832 Label resolve_oop, slow_path; |
860 |
833 |
861 // We use RDI, which also serves as argument register for slow call. |
834 // We use RDI, which also serves as argument register for slow call. |
862 // RAX always holds the src object ptr, except after the slow call and |
835 // RAX always holds the src object ptr, except after the slow call and |
863 // the cmpxchg, then it holds the result. |
836 // the cmpxchg, then it holds the result. |
864 // R8 and RCX are used as temporary registers. |
837 // R8 and RCX are used as temporary registers. |
876 __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); |
849 __ movptr(r8, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); |
877 __ movbool(r8, Address(r8, rdi, Address::times_1)); |
850 __ movbool(r8, Address(r8, rdi, Address::times_1)); |
878 // unlive: rdi |
851 // unlive: rdi |
879 __ testbool(r8); |
852 __ testbool(r8); |
880 // unlive: r8 |
853 // unlive: r8 |
881 __ jccb(Assembler::notZero, not_done); |
854 __ jccb(Assembler::notZero, resolve_oop); |
882 |
855 |
883 __ pop(r8); |
856 __ pop(r8); |
884 __ pop(rdi); |
857 __ pop(rdi); |
885 __ ret(0); |
858 __ ret(0); |
886 |
859 |
887 __ bind(not_done); |
860 __ bind(resolve_oop); |
|
861 |
|
862 __ movptr(r8, Address(rax, oopDesc::mark_offset_in_bytes())); |
|
863 // Test if both lowest bits are set. We trick it by negating the bits |
|
864 // then test for both bits clear. |
|
865 __ notptr(r8); |
|
866 __ testb(r8, markOopDesc::marked_value); |
|
867 __ jccb(Assembler::notZero, slow_path); |
|
868 // Clear both lower bits. It's still inverted, so set them, and then invert back. |
|
869 __ orptr(r8, markOopDesc::marked_value); |
|
870 __ notptr(r8); |
|
871 // At this point, r8 contains the decoded forwarding pointer. |
|
872 __ mov(rax, r8); |
|
873 |
|
874 __ pop(r8); |
|
875 __ pop(rdi); |
|
876 __ ret(0); |
|
877 |
|
878 __ bind(slow_path); |
888 |
879 |
889 __ push(rcx); |
880 __ push(rcx); |
890 __ push(rdx); |
881 __ push(rdx); |
891 __ push(rdi); |
882 __ push(rdi); |
892 __ push(rsi); |
883 __ push(rsi); |