3224 // make sure arguments make sense |
3257 // make sure arguments make sense |
3225 assert_different_registers(obj, var_size_in_bytes, t1, t2); |
3258 assert_different_registers(obj, var_size_in_bytes, t1, t2); |
3226 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); |
3259 assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); |
3227 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); |
3260 assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); |
3228 |
3261 |
3229 // get eden boundaries |
3262 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { |
3230 // note: we need both top & top_addr! |
3263 // No allocation in the shared eden. |
3231 const Register top_addr = t1; |
3264 br(Assembler::always, false, Assembler::pt, slow_case); |
3232 const Register end = t2; |
3265 delayed()->nop(); |
3233 |
3266 } else { |
3234 CollectedHeap* ch = Universe::heap(); |
3267 // get eden boundaries |
3235 set((intx)ch->top_addr(), top_addr); |
3268 // note: we need both top & top_addr! |
3236 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); |
3269 const Register top_addr = t1; |
3237 ld_ptr(top_addr, delta, end); |
3270 const Register end = t2; |
3238 ld_ptr(top_addr, 0, obj); |
3271 |
3239 |
3272 CollectedHeap* ch = Universe::heap(); |
3240 // try to allocate |
3273 set((intx)ch->top_addr(), top_addr); |
3241 Label retry; |
3274 intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); |
3242 bind(retry); |
3275 ld_ptr(top_addr, delta, end); |
|
3276 ld_ptr(top_addr, 0, obj); |
|
3277 |
|
3278 // try to allocate |
|
3279 Label retry; |
|
3280 bind(retry); |
3243 #ifdef ASSERT |
3281 #ifdef ASSERT |
3244 // make sure eden top is properly aligned |
3282 // make sure eden top is properly aligned |
3245 { |
3283 { |
3246 Label L; |
3284 Label L; |
3247 btst(MinObjAlignmentInBytesMask, obj); |
3285 btst(MinObjAlignmentInBytesMask, obj); |
3248 br(Assembler::zero, false, Assembler::pt, L); |
3286 br(Assembler::zero, false, Assembler::pt, L); |
3249 delayed()->nop(); |
3287 delayed()->nop(); |
3250 stop("eden top is not properly aligned"); |
3288 stop("eden top is not properly aligned"); |
3251 bind(L); |
3289 bind(L); |
3252 } |
3290 } |
3253 #endif // ASSERT |
3291 #endif // ASSERT |
3254 const Register free = end; |
3292 const Register free = end; |
3255 sub(end, obj, free); // compute amount of free space |
3293 sub(end, obj, free); // compute amount of free space |
3256 if (var_size_in_bytes->is_valid()) { |
3294 if (var_size_in_bytes->is_valid()) { |
3257 // size is unknown at compile time |
3295 // size is unknown at compile time |
3258 cmp(free, var_size_in_bytes); |
3296 cmp(free, var_size_in_bytes); |
3259 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3297 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3260 delayed()->add(obj, var_size_in_bytes, end); |
3298 delayed()->add(obj, var_size_in_bytes, end); |
3261 } else { |
3299 } else { |
3262 // size is known at compile time |
3300 // size is known at compile time |
3263 cmp(free, con_size_in_bytes); |
3301 cmp(free, con_size_in_bytes); |
3264 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3302 br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case |
3265 delayed()->add(obj, con_size_in_bytes, end); |
3303 delayed()->add(obj, con_size_in_bytes, end); |
3266 } |
3304 } |
3267 // Compare obj with the value at top_addr; if still equal, swap the value of |
3305 // Compare obj with the value at top_addr; if still equal, swap the value of |
3268 // end with the value at top_addr. If not equal, read the value at top_addr |
3306 // end with the value at top_addr. If not equal, read the value at top_addr |
3269 // into end. |
3307 // into end. |
3270 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
3308 casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); |
3271 // if someone beat us on the allocation, try again, otherwise continue |
3309 // if someone beat us on the allocation, try again, otherwise continue |
3272 cmp(obj, end); |
3310 cmp(obj, end); |
3273 brx(Assembler::notEqual, false, Assembler::pn, retry); |
3311 brx(Assembler::notEqual, false, Assembler::pn, retry); |
3274 delayed()->mov(end, obj); // nop if successfull since obj == end |
3312 delayed()->mov(end, obj); // nop if successfull since obj == end |
3275 |
3313 |
3276 #ifdef ASSERT |
3314 #ifdef ASSERT |
3277 // make sure eden top is properly aligned |
3315 // make sure eden top is properly aligned |
3278 { |
3316 { |
3279 Label L; |
3317 Label L; |
3280 const Register top_addr = t1; |
3318 const Register top_addr = t1; |
3281 |
3319 |
3282 set((intx)ch->top_addr(), top_addr); |
3320 set((intx)ch->top_addr(), top_addr); |
3283 ld_ptr(top_addr, 0, top_addr); |
3321 ld_ptr(top_addr, 0, top_addr); |
3284 btst(MinObjAlignmentInBytesMask, top_addr); |
3322 btst(MinObjAlignmentInBytesMask, top_addr); |
3285 br(Assembler::zero, false, Assembler::pt, L); |
3323 br(Assembler::zero, false, Assembler::pt, L); |
3286 delayed()->nop(); |
3324 delayed()->nop(); |
3287 stop("eden top is not properly aligned"); |
3325 stop("eden top is not properly aligned"); |
3288 bind(L); |
3326 bind(L); |
3289 } |
3327 } |
3290 #endif // ASSERT |
3328 #endif // ASSERT |
|
3329 } |
3291 } |
3330 } |
3292 |
3331 |
3293 |
3332 |
3294 void MacroAssembler::tlab_allocate( |
3333 void MacroAssembler::tlab_allocate( |
3295 Register obj, // result: pointer to object after successful allocation |
3334 Register obj, // result: pointer to object after successful allocation |
3535 set((-i*offset)+STACK_BIAS, Rscratch); |
3574 set((-i*offset)+STACK_BIAS, Rscratch); |
3536 st(G0, Rtsp, Rscratch); |
3575 st(G0, Rtsp, Rscratch); |
3537 } |
3576 } |
3538 } |
3577 } |
3539 |
3578 |
|
3579 /////////////////////////////////////////////////////////////////////////////////// |
|
3580 #ifndef SERIALGC |
|
3581 |
|
3582 static uint num_stores = 0; |
|
3583 static uint num_null_pre_stores = 0; |
|
3584 |
|
3585 static void count_null_pre_vals(void* pre_val) { |
|
3586 num_stores++; |
|
3587 if (pre_val == NULL) num_null_pre_stores++; |
|
3588 if ((num_stores % 1000000) == 0) { |
|
3589 tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.", |
|
3590 num_stores, num_null_pre_stores, |
|
3591 100.0*(float)num_null_pre_stores/(float)num_stores); |
|
3592 } |
|
3593 } |
|
3594 |
|
3595 static address satb_log_enqueue_with_frame = 0; |
|
3596 static u_char* satb_log_enqueue_with_frame_end = 0; |
|
3597 |
|
3598 static address satb_log_enqueue_frameless = 0; |
|
3599 static u_char* satb_log_enqueue_frameless_end = 0; |
|
3600 |
|
3601 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? |
|
3602 |
|
3603 // The calls to this don't work. We'd need to do a fair amount of work to |
|
3604 // make it work. |
|
3605 static void check_index(int ind) { |
|
3606 assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0), |
|
3607 "Invariants.") |
|
3608 } |
|
3609 |
|
3610 static void generate_satb_log_enqueue(bool with_frame) { |
|
3611 BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); |
|
3612 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); |
|
3613 MacroAssembler masm(&buf); |
|
3614 address start = masm.pc(); |
|
3615 Register pre_val; |
|
3616 |
|
3617 Label refill, restart; |
|
3618 if (with_frame) { |
|
3619 masm.save_frame(0); |
|
3620 pre_val = I0; // Was O0 before the save. |
|
3621 } else { |
|
3622 pre_val = O0; |
|
3623 } |
|
3624 int satb_q_index_byte_offset = |
|
3625 in_bytes(JavaThread::satb_mark_queue_offset() + |
|
3626 PtrQueue::byte_offset_of_index()); |
|
3627 int satb_q_buf_byte_offset = |
|
3628 in_bytes(JavaThread::satb_mark_queue_offset() + |
|
3629 PtrQueue::byte_offset_of_buf()); |
|
3630 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && |
|
3631 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), |
|
3632 "check sizes in assembly below"); |
|
3633 |
|
3634 masm.bind(restart); |
|
3635 masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0); |
|
3636 |
|
3637 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); |
|
3638 // If the branch is taken, no harm in executing this in the delay slot. |
|
3639 masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); |
|
3640 masm.sub(L0, oopSize, L0); |
|
3641 |
|
3642 masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0 |
|
3643 if (!with_frame) { |
|
3644 // Use return-from-leaf |
|
3645 masm.retl(); |
|
3646 masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); |
|
3647 } else { |
|
3648 // Not delayed. |
|
3649 masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset); |
|
3650 } |
|
3651 if (with_frame) { |
|
3652 masm.ret(); |
|
3653 masm.delayed()->restore(); |
|
3654 } |
|
3655 masm.bind(refill); |
|
3656 |
|
3657 address handle_zero = |
|
3658 CAST_FROM_FN_PTR(address, |
|
3659 &SATBMarkQueueSet::handle_zero_index_for_thread); |
|
3660 // This should be rare enough that we can afford to save all the |
|
3661 // scratch registers that the calling context might be using. |
|
3662 masm.mov(G1_scratch, L0); |
|
3663 masm.mov(G3_scratch, L1); |
|
3664 masm.mov(G4, L2); |
|
3665 // We need the value of O0 above (for the write into the buffer), so we |
|
3666 // save and restore it. |
|
3667 masm.mov(O0, L3); |
|
3668 // Since the call will overwrite O7, we save and restore that, as well. |
|
3669 masm.mov(O7, L4); |
|
3670 masm.call_VM_leaf(L5, handle_zero, G2_thread); |
|
3671 masm.mov(L0, G1_scratch); |
|
3672 masm.mov(L1, G3_scratch); |
|
3673 masm.mov(L2, G4); |
|
3674 masm.mov(L3, O0); |
|
3675 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); |
|
3676 masm.delayed()->mov(L4, O7); |
|
3677 |
|
3678 if (with_frame) { |
|
3679 satb_log_enqueue_with_frame = start; |
|
3680 satb_log_enqueue_with_frame_end = masm.pc(); |
|
3681 } else { |
|
3682 satb_log_enqueue_frameless = start; |
|
3683 satb_log_enqueue_frameless_end = masm.pc(); |
|
3684 } |
|
3685 } |
|
3686 |
|
3687 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { |
|
3688 if (with_frame) { |
|
3689 if (satb_log_enqueue_with_frame == 0) { |
|
3690 generate_satb_log_enqueue(with_frame); |
|
3691 assert(satb_log_enqueue_with_frame != 0, "postcondition."); |
|
3692 if (G1SATBPrintStubs) { |
|
3693 tty->print_cr("Generated with-frame satb enqueue:"); |
|
3694 Disassembler::decode((u_char*)satb_log_enqueue_with_frame, |
|
3695 satb_log_enqueue_with_frame_end, |
|
3696 tty); |
|
3697 } |
|
3698 } |
|
3699 } else { |
|
3700 if (satb_log_enqueue_frameless == 0) { |
|
3701 generate_satb_log_enqueue(with_frame); |
|
3702 assert(satb_log_enqueue_frameless != 0, "postcondition."); |
|
3703 if (G1SATBPrintStubs) { |
|
3704 tty->print_cr("Generated frameless satb enqueue:"); |
|
3705 Disassembler::decode((u_char*)satb_log_enqueue_frameless, |
|
3706 satb_log_enqueue_frameless_end, |
|
3707 tty); |
|
3708 } |
|
3709 } |
|
3710 } |
|
3711 } |
|
3712 |
|
3713 void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) { |
|
3714 assert(offset == 0 || index == noreg, "choose one"); |
|
3715 |
|
3716 if (G1DisablePreBarrier) return; |
|
3717 // satb_log_barrier(tmp, obj, offset, preserve_o_regs); |
|
3718 Label filtered; |
|
3719 // satb_log_barrier_work0(tmp, filtered); |
|
3720 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { |
|
3721 ld(G2, |
|
3722 in_bytes(JavaThread::satb_mark_queue_offset() + |
|
3723 PtrQueue::byte_offset_of_active()), |
|
3724 tmp); |
|
3725 } else { |
|
3726 guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, |
|
3727 "Assumption"); |
|
3728 ldsb(G2, |
|
3729 in_bytes(JavaThread::satb_mark_queue_offset() + |
|
3730 PtrQueue::byte_offset_of_active()), |
|
3731 tmp); |
|
3732 } |
|
3733 // Check on whether to annul. |
|
3734 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); |
|
3735 delayed() -> nop(); |
|
3736 |
|
3737 // satb_log_barrier_work1(tmp, offset); |
|
3738 if (index == noreg) { |
|
3739 if (Assembler::is_simm13(offset)) { |
|
3740 ld_ptr(obj, offset, tmp); |
|
3741 } else { |
|
3742 set(offset, tmp); |
|
3743 ld_ptr(obj, tmp, tmp); |
|
3744 } |
|
3745 } else { |
|
3746 ld_ptr(obj, index, tmp); |
|
3747 } |
|
3748 |
|
3749 // satb_log_barrier_work2(obj, tmp, offset); |
|
3750 |
|
3751 // satb_log_barrier_work3(tmp, filtered, preserve_o_regs); |
|
3752 |
|
3753 const Register pre_val = tmp; |
|
3754 |
|
3755 if (G1SATBBarrierPrintNullPreVals) { |
|
3756 save_frame(0); |
|
3757 mov(pre_val, O0); |
|
3758 // Save G-regs that target may use. |
|
3759 mov(G1, L1); |
|
3760 mov(G2, L2); |
|
3761 mov(G3, L3); |
|
3762 mov(G4, L4); |
|
3763 mov(G5, L5); |
|
3764 call(CAST_FROM_FN_PTR(address, &count_null_pre_vals)); |
|
3765 delayed()->nop(); |
|
3766 // Restore G-regs that target may have used. |
|
3767 mov(L1, G1); |
|
3768 mov(L2, G2); |
|
3769 mov(L3, G3); |
|
3770 mov(L4, G4); |
|
3771 mov(L5, G5); |
|
3772 restore(G0, G0, G0); |
|
3773 } |
|
3774 |
|
3775 // Check on whether to annul. |
|
3776 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); |
|
3777 delayed() -> nop(); |
|
3778 |
|
3779 // OK, it's not filtered, so we'll need to call enqueue. In the normal |
|
3780 // case, pre_val will be a scratch G-reg, but there's some cases in which |
|
3781 // it's an O-reg. In the first case, do a normal call. In the latter, |
|
3782 // do a save here and call the frameless version. |
|
3783 |
|
3784 guarantee(pre_val->is_global() || pre_val->is_out(), |
|
3785 "Or we need to think harder."); |
|
3786 if (pre_val->is_global() && !preserve_o_regs) { |
|
3787 generate_satb_log_enqueue_if_necessary(true); // with frame. |
|
3788 call(satb_log_enqueue_with_frame); |
|
3789 delayed()->mov(pre_val, O0); |
|
3790 } else { |
|
3791 generate_satb_log_enqueue_if_necessary(false); // with frameless. |
|
3792 save_frame(0); |
|
3793 call(satb_log_enqueue_frameless); |
|
3794 delayed()->mov(pre_val->after_save(), O0); |
|
3795 restore(); |
|
3796 } |
|
3797 |
|
3798 bind(filtered); |
|
3799 } |
|
3800 |
|
3801 static jint num_ct_writes = 0; |
|
3802 static jint num_ct_writes_filtered_in_hr = 0; |
|
3803 static jint num_ct_writes_filtered_null = 0; |
|
3804 static jint num_ct_writes_filtered_pop = 0; |
|
3805 static G1CollectedHeap* g1 = NULL; |
|
3806 |
|
3807 static Thread* count_ct_writes(void* filter_val, void* new_val) { |
|
3808 Atomic::inc(&num_ct_writes); |
|
3809 if (filter_val == NULL) { |
|
3810 Atomic::inc(&num_ct_writes_filtered_in_hr); |
|
3811 } else if (new_val == NULL) { |
|
3812 Atomic::inc(&num_ct_writes_filtered_null); |
|
3813 } else { |
|
3814 if (g1 == NULL) { |
|
3815 g1 = G1CollectedHeap::heap(); |
|
3816 } |
|
3817 if ((HeapWord*)new_val < g1->popular_object_boundary()) { |
|
3818 Atomic::inc(&num_ct_writes_filtered_pop); |
|
3819 } |
|
3820 } |
|
3821 if ((num_ct_writes % 1000000) == 0) { |
|
3822 jint num_ct_writes_filtered = |
|
3823 num_ct_writes_filtered_in_hr + |
|
3824 num_ct_writes_filtered_null + |
|
3825 num_ct_writes_filtered_pop; |
|
3826 |
|
3827 tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" |
|
3828 " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", |
|
3829 num_ct_writes, |
|
3830 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, |
|
3831 100.0*(float)num_ct_writes_filtered_in_hr/ |
|
3832 (float)num_ct_writes, |
|
3833 100.0*(float)num_ct_writes_filtered_null/ |
|
3834 (float)num_ct_writes, |
|
3835 100.0*(float)num_ct_writes_filtered_pop/ |
|
3836 (float)num_ct_writes); |
|
3837 } |
|
3838 return Thread::current(); |
|
3839 } |
|
3840 |
|
3841 static address dirty_card_log_enqueue = 0; |
|
3842 static u_char* dirty_card_log_enqueue_end = 0; |
|
3843 |
|
3844 // This gets to assume that o0 contains the object address. |
|
3845 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { |
|
3846 BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); |
|
3847 CodeBuffer buf(bb->instructions_begin(), bb->instructions_size()); |
|
3848 MacroAssembler masm(&buf); |
|
3849 address start = masm.pc(); |
|
3850 |
|
3851 Label not_already_dirty, restart, refill; |
|
3852 |
|
3853 #ifdef _LP64 |
|
3854 masm.srlx(O0, CardTableModRefBS::card_shift, O0); |
|
3855 #else |
|
3856 masm.srl(O0, CardTableModRefBS::card_shift, O0); |
|
3857 #endif |
|
3858 Address rs(O1, (address)byte_map_base); |
|
3859 masm.load_address(rs); // O1 := <card table base> |
|
3860 masm.ldub(O0, O1, O2); // O2 := [O0 + O1] |
|
3861 |
|
3862 masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, |
|
3863 O2, not_already_dirty); |
|
3864 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch |
|
3865 // case, harmless if not. |
|
3866 masm.delayed()->add(O0, O1, O3); |
|
3867 |
|
3868 // We didn't take the branch, so we're already dirty: return. |
|
3869 // Use return-from-leaf |
|
3870 masm.retl(); |
|
3871 masm.delayed()->nop(); |
|
3872 |
|
3873 // Not dirty. |
|
3874 masm.bind(not_already_dirty); |
|
3875 // First, dirty it. |
|
3876 masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). |
|
3877 int dirty_card_q_index_byte_offset = |
|
3878 in_bytes(JavaThread::dirty_card_queue_offset() + |
|
3879 PtrQueue::byte_offset_of_index()); |
|
3880 int dirty_card_q_buf_byte_offset = |
|
3881 in_bytes(JavaThread::dirty_card_queue_offset() + |
|
3882 PtrQueue::byte_offset_of_buf()); |
|
3883 masm.bind(restart); |
|
3884 masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); |
|
3885 |
|
3886 masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, |
|
3887 L0, refill); |
|
3888 // If the branch is taken, no harm in executing this in the delay slot. |
|
3889 masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); |
|
3890 masm.sub(L0, oopSize, L0); |
|
3891 |
|
3892 masm.st_ptr(O3, L1, L0); // [_buf + index] := I0 |
|
3893 // Use return-from-leaf |
|
3894 masm.retl(); |
|
3895 masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); |
|
3896 |
|
3897 masm.bind(refill); |
|
3898 address handle_zero = |
|
3899 CAST_FROM_FN_PTR(address, |
|
3900 &DirtyCardQueueSet::handle_zero_index_for_thread); |
|
3901 // This should be rare enough that we can afford to save all the |
|
3902 // scratch registers that the calling context might be using. |
|
3903 masm.mov(G1_scratch, L3); |
|
3904 masm.mov(G3_scratch, L5); |
|
3905 // We need the value of O3 above (for the write into the buffer), so we |
|
3906 // save and restore it. |
|
3907 masm.mov(O3, L6); |
|
3908 // Since the call will overwrite O7, we save and restore that, as well. |
|
3909 masm.mov(O7, L4); |
|
3910 |
|
3911 masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); |
|
3912 masm.mov(L3, G1_scratch); |
|
3913 masm.mov(L5, G3_scratch); |
|
3914 masm.mov(L6, O3); |
|
3915 masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart); |
|
3916 masm.delayed()->mov(L4, O7); |
|
3917 |
|
3918 dirty_card_log_enqueue = start; |
|
3919 dirty_card_log_enqueue_end = masm.pc(); |
|
3920 // XXX Should have a guarantee here about not going off the end! |
|
3921 // Does it already do so? Do an experiment... |
|
3922 } |
|
3923 |
|
3924 static inline void |
|
3925 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { |
|
3926 if (dirty_card_log_enqueue == 0) { |
|
3927 generate_dirty_card_log_enqueue(byte_map_base); |
|
3928 assert(dirty_card_log_enqueue != 0, "postcondition."); |
|
3929 if (G1SATBPrintStubs) { |
|
3930 tty->print_cr("Generated dirty_card enqueue:"); |
|
3931 Disassembler::decode((u_char*)dirty_card_log_enqueue, |
|
3932 dirty_card_log_enqueue_end, |
|
3933 tty); |
|
3934 } |
|
3935 } |
|
3936 } |
|
3937 |
|
3938 |
|
3939 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { |
|
3940 |
|
3941 Label filtered; |
|
3942 MacroAssembler* post_filter_masm = this; |
|
3943 |
|
3944 if (new_val == G0) return; |
|
3945 if (G1DisablePostBarrier) return; |
|
3946 |
|
3947 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); |
|
3948 assert(bs->kind() == BarrierSet::G1SATBCT || |
|
3949 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); |
|
3950 if (G1RSBarrierRegionFilter) { |
|
3951 xor3(store_addr, new_val, tmp); |
|
3952 #ifdef _LP64 |
|
3953 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
|
3954 #else |
|
3955 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
|
3956 #endif |
|
3957 if (G1PrintCTFilterStats) { |
|
3958 guarantee(tmp->is_global(), "Or stats won't work..."); |
|
3959 // This is a sleazy hack: I'm temporarily hijacking G2, which I |
|
3960 // promise to restore. |
|
3961 mov(new_val, G2); |
|
3962 save_frame(0); |
|
3963 mov(tmp, O0); |
|
3964 mov(G2, O1); |
|
3965 // Save G-regs that target may use. |
|
3966 mov(G1, L1); |
|
3967 mov(G2, L2); |
|
3968 mov(G3, L3); |
|
3969 mov(G4, L4); |
|
3970 mov(G5, L5); |
|
3971 call(CAST_FROM_FN_PTR(address, &count_ct_writes)); |
|
3972 delayed()->nop(); |
|
3973 mov(O0, G2); |
|
3974 // Restore G-regs that target may have used. |
|
3975 mov(L1, G1); |
|
3976 mov(L3, G3); |
|
3977 mov(L4, G4); |
|
3978 mov(L5, G5); |
|
3979 restore(G0, G0, G0); |
|
3980 } |
|
3981 // XXX Should I predict this taken or not? Does it mattern? |
|
3982 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); |
|
3983 delayed()->nop(); |
|
3984 } |
|
3985 |
|
3986 // Now we decide how to generate the card table write. If we're |
|
3987 // enqueueing, we call out to a generated function. Otherwise, we do it |
|
3988 // inline here. |
|
3989 |
|
3990 if (G1RSBarrierUseQueue) { |
|
3991 // If the "store_addr" register is an "in" or "local" register, move it to |
|
3992 // a scratch reg so we can pass it as an argument. |
|
3993 bool use_scr = !(store_addr->is_global() || store_addr->is_out()); |
|
3994 // Pick a scratch register different from "tmp". |
|
3995 Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); |
|
3996 // Make sure we use up the delay slot! |
|
3997 if (use_scr) { |
|
3998 post_filter_masm->mov(store_addr, scr); |
|
3999 } else { |
|
4000 post_filter_masm->nop(); |
|
4001 } |
|
4002 generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); |
|
4003 save_frame(0); |
|
4004 call(dirty_card_log_enqueue); |
|
4005 if (use_scr) { |
|
4006 delayed()->mov(scr, O0); |
|
4007 } else { |
|
4008 delayed()->mov(store_addr->after_save(), O0); |
|
4009 } |
|
4010 restore(); |
|
4011 |
|
4012 } else { |
|
4013 |
|
4014 #ifdef _LP64 |
|
4015 post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); |
|
4016 #else |
|
4017 post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); |
|
4018 #endif |
|
4019 assert( tmp != store_addr, "need separate temp reg"); |
|
4020 Address rs(tmp, (address)bs->byte_map_base); |
|
4021 load_address(rs); |
|
4022 stb(G0, rs.base(), store_addr); |
|
4023 } |
|
4024 |
|
4025 bind(filtered); |
|
4026 |
|
4027 } |
|
4028 |
|
4029 #endif // SERIALGC |
|
4030 /////////////////////////////////////////////////////////////////////////////////// |
|
4031 |
|
4032 void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { |
|
4033 // If we're writing constant NULL, we can skip the write barrier. |
|
4034 if (new_val == G0) return; |
|
4035 CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); |
|
4036 assert(bs->kind() == BarrierSet::CardTableModRef || |
|
4037 bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); |
|
4038 card_table_write(bs->byte_map_base, tmp, store_addr); |
|
4039 } |
|
4040 |
3540 void MacroAssembler::load_klass(Register s, Register d) { |
4041 void MacroAssembler::load_klass(Register s, Register d) { |
3541 // The number of bytes in this code is used by |
4042 // The number of bytes in this code is used by |
3542 // MachCallDynamicJavaNode::ret_addr_offset() |
4043 // MachCallDynamicJavaNode::ret_addr_offset() |
3543 // if this changes, change that. |
4044 // if this changes, change that. |
3544 if (UseCompressedOops) { |
4045 if (UseCompressedOops) { |