hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
changeset 1374 4c24294029a9
parent 371 1aacedc9db7c
child 1376 f7fc7a708b63
equal deleted inserted replaced
615:570062d730b2 1374:4c24294029a9
   126   return r;
   126   return r;
   127 }
   127 }
   128 
   128 
   129 int AbstractAssembler::code_fill_byte() {
   129 int AbstractAssembler::code_fill_byte() {
   130   return 0x00;                  // illegal instruction 0x00000000
   130   return 0x00;                  // illegal instruction 0x00000000
       
   131 }
       
   132 
       
   133 Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
       
   134   switch (in) {
       
   135   case rc_z:   return equal;
       
   136   case rc_lez: return lessEqual;
       
   137   case rc_lz:  return less;
       
   138   case rc_nz:  return notEqual;
       
   139   case rc_gz:  return greater;
       
   140   case rc_gez: return greaterEqual;
       
   141   default:
       
   142     ShouldNotReachHere();
       
   143   }
       
   144   return equal;
   131 }
   145 }
   132 
   146 
   133 // Generate a bunch 'o stuff (including v9's
   147 // Generate a bunch 'o stuff (including v9's
   134 #ifndef PRODUCT
   148 #ifndef PRODUCT
   135 void Assembler::test_v9() {
   149 void Assembler::test_v9() {
  1211 
  1225 
  1212   st_ptr(oop_result, vm_result_addr);
  1226   st_ptr(oop_result, vm_result_addr);
  1213 }
  1227 }
  1214 
  1228 
  1215 
  1229 
  1216 void MacroAssembler::store_check(Register tmp, Register obj) {
  1230 void MacroAssembler::card_table_write(jbyte* byte_map_base,
  1217   // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
  1231                                       Register tmp, Register obj) {
  1218 
       
  1219   /* $$$ This stuff needs to go into one of the BarrierSet generator
       
  1220      functions.  (The particular barrier sets will have to be friends of
       
  1221      MacroAssembler, I guess.) */
       
  1222   BarrierSet* bs = Universe::heap()->barrier_set();
       
  1223   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
       
  1224   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
       
  1225   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
       
  1226 #ifdef _LP64
  1232 #ifdef _LP64
  1227   srlx(obj, CardTableModRefBS::card_shift, obj);
  1233   srlx(obj, CardTableModRefBS::card_shift, obj);
  1228 #else
  1234 #else
  1229   srl(obj, CardTableModRefBS::card_shift, obj);
  1235   srl(obj, CardTableModRefBS::card_shift, obj);
  1230 #endif
  1236 #endif
  1231   assert( tmp != obj, "need separate temp reg");
  1237   assert( tmp != obj, "need separate temp reg");
  1232   Address rs(tmp, (address)ct->byte_map_base);
  1238   Address rs(tmp, (address)byte_map_base);
  1233   load_address(rs);
  1239   load_address(rs);
  1234   stb(G0, rs.base(), obj);
  1240   stb(G0, rs.base(), obj);
  1235 }
       
  1236 
       
  1237 void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
       
  1238   store_check(tmp, obj);
       
  1239 }
  1241 }
  1240 
  1242 
  1241 // %%% Note:  The following six instructions have been moved,
  1243 // %%% Note:  The following six instructions have been moved,
  1242 //            unchanged, from assembler_sparc.inline.hpp.
  1244 //            unchanged, from assembler_sparc.inline.hpp.
  1243 //            They will be refactored at a later date.
  1245 //            They will be refactored at a later date.
  1646   // plausibility check for oops
  1648   // plausibility check for oops
  1647   if (!VerifyOops) return;
  1649   if (!VerifyOops) return;
  1648 
  1650 
  1649   if (reg == G0)  return;       // always NULL, which is always an oop
  1651   if (reg == G0)  return;       // always NULL, which is always an oop
  1650 
  1652 
  1651   char buffer[16];
  1653   char buffer[64];
       
  1654 #ifdef COMPILER1
       
  1655   if (CommentedAssembly) {
       
  1656     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
       
  1657     block_comment(buffer);
       
  1658   }
       
  1659 #endif
       
  1660 
       
  1661   int len = strlen(file) + strlen(msg) + 1 + 4;
  1652   sprintf(buffer, "%d", line);
  1662   sprintf(buffer, "%d", line);
  1653   int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
  1663   len += strlen(buffer);
       
  1664   sprintf(buffer, " at offset %d ", offset());
       
  1665   len += strlen(buffer);
  1654   char * real_msg = new char[len];
  1666   char * real_msg = new char[len];
  1655   sprintf(real_msg, "%s (%s:%d)", msg, file, line);
  1667   sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
  1656 
  1668 
  1657   // Call indirectly to solve generation ordering problem
  1669   // Call indirectly to solve generation ordering problem
  1658   Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
  1670   Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
  1659 
  1671 
  1660   // Make some space on stack above the current register window.
  1672   // Make some space on stack above the current register window.
  2040   bpr( rc_nz, a, p, s1, L );
  2052   bpr( rc_nz, a, p, s1, L );
  2041 #else
  2053 #else
  2042   tst(s1);
  2054   tst(s1);
  2043   br ( notZero, a, p, L );
  2055   br ( notZero, a, p, L );
  2044 #endif
  2056 #endif
       
  2057 }
       
  2058 
       
  2059 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
       
  2060                                      Register s1, address d,
       
  2061                                      relocInfo::relocType rt ) {
       
  2062   if (VM_Version::v9_instructions_work()) {
       
  2063     bpr(rc, a, p, s1, d, rt);
       
  2064   } else {
       
  2065     tst(s1);
       
  2066     br(reg_cond_to_cc_cond(rc), a, p, d, rt);
       
  2067   }
       
  2068 }
       
  2069 
       
  2070 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
       
  2071                                      Register s1, Label& L ) {
       
  2072   if (VM_Version::v9_instructions_work()) {
       
  2073     bpr(rc, a, p, s1, L);
       
  2074   } else {
       
  2075     tst(s1);
       
  2076     br(reg_cond_to_cc_cond(rc), a, p, L);
       
  2077   }
  2045 }
  2078 }
  2046 
  2079 
  2047 
  2080 
  2048 // instruction sequences factored across compiler & interpreter
  2081 // instruction sequences factored across compiler & interpreter
  2049 
  2082 
  3224   // make sure arguments make sense
  3257   // make sure arguments make sense
  3225   assert_different_registers(obj, var_size_in_bytes, t1, t2);
  3258   assert_different_registers(obj, var_size_in_bytes, t1, t2);
  3226   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
  3259   assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
  3227   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
  3260   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
  3228 
  3261 
  3229   // get eden boundaries
  3262   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
  3230   // note: we need both top & top_addr!
  3263     // No allocation in the shared eden.
  3231   const Register top_addr = t1;
  3264     br(Assembler::always, false, Assembler::pt, slow_case);
  3232   const Register end      = t2;
  3265     delayed()->nop();
  3233 
  3266   } else {
  3234   CollectedHeap* ch = Universe::heap();
  3267     // get eden boundaries
  3235   set((intx)ch->top_addr(), top_addr);
  3268     // note: we need both top & top_addr!
  3236   intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
  3269     const Register top_addr = t1;
  3237   ld_ptr(top_addr, delta, end);
  3270     const Register end      = t2;
  3238   ld_ptr(top_addr, 0, obj);
  3271 
  3239 
  3272     CollectedHeap* ch = Universe::heap();
  3240   // try to allocate
  3273     set((intx)ch->top_addr(), top_addr);
  3241   Label retry;
  3274     intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
  3242   bind(retry);
  3275     ld_ptr(top_addr, delta, end);
       
  3276     ld_ptr(top_addr, 0, obj);
       
  3277 
       
  3278     // try to allocate
       
  3279     Label retry;
       
  3280     bind(retry);
  3243 #ifdef ASSERT
  3281 #ifdef ASSERT
  3244   // make sure eden top is properly aligned
  3282     // make sure eden top is properly aligned
  3245   {
  3283     {
  3246     Label L;
  3284       Label L;
  3247     btst(MinObjAlignmentInBytesMask, obj);
  3285       btst(MinObjAlignmentInBytesMask, obj);
  3248     br(Assembler::zero, false, Assembler::pt, L);
  3286       br(Assembler::zero, false, Assembler::pt, L);
  3249     delayed()->nop();
  3287       delayed()->nop();
  3250     stop("eden top is not properly aligned");
  3288       stop("eden top is not properly aligned");
  3251     bind(L);
  3289       bind(L);
  3252   }
  3290     }
  3253 #endif // ASSERT
  3291 #endif // ASSERT
  3254   const Register free = end;
  3292     const Register free = end;
  3255   sub(end, obj, free);                                   // compute amount of free space
  3293     sub(end, obj, free);                                   // compute amount of free space
  3256   if (var_size_in_bytes->is_valid()) {
  3294     if (var_size_in_bytes->is_valid()) {
  3257     // size is unknown at compile time
  3295       // size is unknown at compile time
  3258     cmp(free, var_size_in_bytes);
  3296       cmp(free, var_size_in_bytes);
  3259     br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
  3297       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
  3260     delayed()->add(obj, var_size_in_bytes, end);
  3298       delayed()->add(obj, var_size_in_bytes, end);
  3261   } else {
  3299     } else {
  3262     // size is known at compile time
  3300       // size is known at compile time
  3263     cmp(free, con_size_in_bytes);
  3301       cmp(free, con_size_in_bytes);
  3264     br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
  3302       br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
  3265     delayed()->add(obj, con_size_in_bytes, end);
  3303       delayed()->add(obj, con_size_in_bytes, end);
  3266   }
  3304     }
  3267   // Compare obj with the value at top_addr; if still equal, swap the value of
  3305     // Compare obj with the value at top_addr; if still equal, swap the value of
  3268   // end with the value at top_addr. If not equal, read the value at top_addr
  3306     // end with the value at top_addr. If not equal, read the value at top_addr
  3269   // into end.
  3307     // into end.
  3270   casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  3308     casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
  3271   // if someone beat us on the allocation, try again, otherwise continue
  3309     // if someone beat us on the allocation, try again, otherwise continue
  3272   cmp(obj, end);
  3310     cmp(obj, end);
  3273   brx(Assembler::notEqual, false, Assembler::pn, retry);
  3311     brx(Assembler::notEqual, false, Assembler::pn, retry);
  3274   delayed()->mov(end, obj);                              // nop if successfull since obj == end
  3312     delayed()->mov(end, obj);                              // nop if successfull since obj == end
  3275 
  3313 
  3276 #ifdef ASSERT
  3314 #ifdef ASSERT
  3277   // make sure eden top is properly aligned
  3315     // make sure eden top is properly aligned
  3278   {
  3316     {
  3279     Label L;
  3317       Label L;
  3280     const Register top_addr = t1;
  3318       const Register top_addr = t1;
  3281 
  3319 
  3282     set((intx)ch->top_addr(), top_addr);
  3320       set((intx)ch->top_addr(), top_addr);
  3283     ld_ptr(top_addr, 0, top_addr);
  3321       ld_ptr(top_addr, 0, top_addr);
  3284     btst(MinObjAlignmentInBytesMask, top_addr);
  3322       btst(MinObjAlignmentInBytesMask, top_addr);
  3285     br(Assembler::zero, false, Assembler::pt, L);
  3323       br(Assembler::zero, false, Assembler::pt, L);
  3286     delayed()->nop();
  3324       delayed()->nop();
  3287     stop("eden top is not properly aligned");
  3325       stop("eden top is not properly aligned");
  3288     bind(L);
  3326       bind(L);
  3289   }
  3327     }
  3290 #endif // ASSERT
  3328 #endif // ASSERT
       
  3329   }
  3291 }
  3330 }
  3292 
  3331 
  3293 
  3332 
  3294 void MacroAssembler::tlab_allocate(
  3333 void MacroAssembler::tlab_allocate(
  3295   Register obj,                        // result: pointer to object after successful allocation
  3334   Register obj,                        // result: pointer to object after successful allocation
  3535     set((-i*offset)+STACK_BIAS, Rscratch);
  3574     set((-i*offset)+STACK_BIAS, Rscratch);
  3536     st(G0, Rtsp, Rscratch);
  3575     st(G0, Rtsp, Rscratch);
  3537   }
  3576   }
  3538 }
  3577 }
  3539 
  3578 
       
  3579 ///////////////////////////////////////////////////////////////////////////////////
       
  3580 #ifndef SERIALGC
       
  3581 
       
  3582 static uint num_stores = 0;
       
  3583 static uint num_null_pre_stores = 0;
       
  3584 
       
  3585 static void count_null_pre_vals(void* pre_val) {
       
  3586   num_stores++;
       
  3587   if (pre_val == NULL) num_null_pre_stores++;
       
  3588   if ((num_stores % 1000000) == 0) {
       
  3589     tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
       
  3590                   num_stores, num_null_pre_stores,
       
  3591                   100.0*(float)num_null_pre_stores/(float)num_stores);
       
  3592   }
       
  3593 }
       
  3594 
       
  3595 static address satb_log_enqueue_with_frame = 0;
       
  3596 static u_char* satb_log_enqueue_with_frame_end = 0;
       
  3597 
       
  3598 static address satb_log_enqueue_frameless = 0;
       
  3599 static u_char* satb_log_enqueue_frameless_end = 0;
       
  3600 
       
  3601 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
       
  3602 
       
  3603 // The calls to this don't work.  We'd need to do a fair amount of work to
       
  3604 // make it work.
       
  3605 static void check_index(int ind) {
       
  3606   assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
       
  3607          "Invariants.")
       
  3608 }
       
  3609 
       
  3610 static void generate_satb_log_enqueue(bool with_frame) {
       
  3611   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
       
  3612   CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
       
  3613   MacroAssembler masm(&buf);
       
  3614   address start = masm.pc();
       
  3615   Register pre_val;
       
  3616 
       
  3617   Label refill, restart;
       
  3618   if (with_frame) {
       
  3619     masm.save_frame(0);
       
  3620     pre_val = I0;  // Was O0 before the save.
       
  3621   } else {
       
  3622     pre_val = O0;
       
  3623   }
       
  3624   int satb_q_index_byte_offset =
       
  3625     in_bytes(JavaThread::satb_mark_queue_offset() +
       
  3626              PtrQueue::byte_offset_of_index());
       
  3627   int satb_q_buf_byte_offset =
       
  3628     in_bytes(JavaThread::satb_mark_queue_offset() +
       
  3629              PtrQueue::byte_offset_of_buf());
       
  3630   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
       
  3631          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
       
  3632          "check sizes in assembly below");
       
  3633 
       
  3634   masm.bind(restart);
       
  3635   masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
       
  3636 
       
  3637   masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
       
  3638   // If the branch is taken, no harm in executing this in the delay slot.
       
  3639   masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
       
  3640   masm.sub(L0, oopSize, L0);
       
  3641 
       
  3642   masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
       
  3643   if (!with_frame) {
       
  3644     // Use return-from-leaf
       
  3645     masm.retl();
       
  3646     masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
       
  3647   } else {
       
  3648     // Not delayed.
       
  3649     masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
       
  3650   }
       
  3651   if (with_frame) {
       
  3652     masm.ret();
       
  3653     masm.delayed()->restore();
       
  3654   }
       
  3655   masm.bind(refill);
       
  3656 
       
  3657   address handle_zero =
       
  3658     CAST_FROM_FN_PTR(address,
       
  3659                      &SATBMarkQueueSet::handle_zero_index_for_thread);
       
  3660   // This should be rare enough that we can afford to save all the
       
  3661   // scratch registers that the calling context might be using.
       
  3662   masm.mov(G1_scratch, L0);
       
  3663   masm.mov(G3_scratch, L1);
       
  3664   masm.mov(G4, L2);
       
  3665   // We need the value of O0 above (for the write into the buffer), so we
       
  3666   // save and restore it.
       
  3667   masm.mov(O0, L3);
       
  3668   // Since the call will overwrite O7, we save and restore that, as well.
       
  3669   masm.mov(O7, L4);
       
  3670   masm.call_VM_leaf(L5, handle_zero, G2_thread);
       
  3671   masm.mov(L0, G1_scratch);
       
  3672   masm.mov(L1, G3_scratch);
       
  3673   masm.mov(L2, G4);
       
  3674   masm.mov(L3, O0);
       
  3675   masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
       
  3676   masm.delayed()->mov(L4, O7);
       
  3677 
       
  3678   if (with_frame) {
       
  3679     satb_log_enqueue_with_frame = start;
       
  3680     satb_log_enqueue_with_frame_end = masm.pc();
       
  3681   } else {
       
  3682     satb_log_enqueue_frameless = start;
       
  3683     satb_log_enqueue_frameless_end = masm.pc();
       
  3684   }
       
  3685 }
       
  3686 
       
  3687 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
       
  3688   if (with_frame) {
       
  3689     if (satb_log_enqueue_with_frame == 0) {
       
  3690       generate_satb_log_enqueue(with_frame);
       
  3691       assert(satb_log_enqueue_with_frame != 0, "postcondition.");
       
  3692       if (G1SATBPrintStubs) {
       
  3693         tty->print_cr("Generated with-frame satb enqueue:");
       
  3694         Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
       
  3695                              satb_log_enqueue_with_frame_end,
       
  3696                              tty);
       
  3697       }
       
  3698     }
       
  3699   } else {
       
  3700     if (satb_log_enqueue_frameless == 0) {
       
  3701       generate_satb_log_enqueue(with_frame);
       
  3702       assert(satb_log_enqueue_frameless != 0, "postcondition.");
       
  3703       if (G1SATBPrintStubs) {
       
  3704         tty->print_cr("Generated frameless satb enqueue:");
       
  3705         Disassembler::decode((u_char*)satb_log_enqueue_frameless,
       
  3706                              satb_log_enqueue_frameless_end,
       
  3707                              tty);
       
  3708       }
       
  3709     }
       
  3710   }
       
  3711 }
       
  3712 
       
  3713 void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
       
  3714   assert(offset == 0 || index == noreg, "choose one");
       
  3715 
       
  3716   if (G1DisablePreBarrier) return;
       
  3717   // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
       
  3718   Label filtered;
       
  3719   // satb_log_barrier_work0(tmp, filtered);
       
  3720   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
       
  3721     ld(G2,
       
  3722        in_bytes(JavaThread::satb_mark_queue_offset() +
       
  3723                 PtrQueue::byte_offset_of_active()),
       
  3724        tmp);
       
  3725   } else {
       
  3726     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
       
  3727               "Assumption");
       
  3728     ldsb(G2,
       
  3729          in_bytes(JavaThread::satb_mark_queue_offset() +
       
  3730                   PtrQueue::byte_offset_of_active()),
       
  3731          tmp);
       
  3732   }
       
  3733   // Check on whether to annul.
       
  3734   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
       
  3735   delayed() -> nop();
       
  3736 
       
  3737   // satb_log_barrier_work1(tmp, offset);
       
  3738   if (index == noreg) {
       
  3739     if (Assembler::is_simm13(offset)) {
       
  3740       ld_ptr(obj, offset, tmp);
       
  3741     } else {
       
  3742       set(offset, tmp);
       
  3743       ld_ptr(obj, tmp, tmp);
       
  3744     }
       
  3745   } else {
       
  3746     ld_ptr(obj, index, tmp);
       
  3747   }
       
  3748 
       
  3749   // satb_log_barrier_work2(obj, tmp, offset);
       
  3750 
       
  3751   // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
       
  3752 
       
  3753   const Register pre_val = tmp;
       
  3754 
       
  3755   if (G1SATBBarrierPrintNullPreVals) {
       
  3756     save_frame(0);
       
  3757     mov(pre_val, O0);
       
  3758     // Save G-regs that target may use.
       
  3759     mov(G1, L1);
       
  3760     mov(G2, L2);
       
  3761     mov(G3, L3);
       
  3762     mov(G4, L4);
       
  3763     mov(G5, L5);
       
  3764     call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
       
  3765     delayed()->nop();
       
  3766     // Restore G-regs that target may have used.
       
  3767     mov(L1, G1);
       
  3768     mov(L2, G2);
       
  3769     mov(L3, G3);
       
  3770     mov(L4, G4);
       
  3771     mov(L5, G5);
       
  3772     restore(G0, G0, G0);
       
  3773   }
       
  3774 
       
  3775   // Check on whether to annul.
       
  3776   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
       
  3777   delayed() -> nop();
       
  3778 
       
  3779   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
       
  3780   // case, pre_val will be a scratch G-reg, but there's some cases in which
       
  3781   // it's an O-reg.  In the first case, do a normal call.  In the latter,
       
  3782   // do a save here and call the frameless version.
       
  3783 
       
  3784   guarantee(pre_val->is_global() || pre_val->is_out(),
       
  3785             "Or we need to think harder.");
       
  3786   if (pre_val->is_global() && !preserve_o_regs) {
       
  3787     generate_satb_log_enqueue_if_necessary(true); // with frame.
       
  3788     call(satb_log_enqueue_with_frame);
       
  3789     delayed()->mov(pre_val, O0);
       
  3790   } else {
       
  3791     generate_satb_log_enqueue_if_necessary(false); // with frameless.
       
  3792     save_frame(0);
       
  3793     call(satb_log_enqueue_frameless);
       
  3794     delayed()->mov(pre_val->after_save(), O0);
       
  3795     restore();
       
  3796   }
       
  3797 
       
  3798   bind(filtered);
       
  3799 }
       
  3800 
       
  3801 static jint num_ct_writes = 0;
       
  3802 static jint num_ct_writes_filtered_in_hr = 0;
       
  3803 static jint num_ct_writes_filtered_null = 0;
       
  3804 static jint num_ct_writes_filtered_pop = 0;
       
  3805 static G1CollectedHeap* g1 = NULL;
       
  3806 
       
  3807 static Thread* count_ct_writes(void* filter_val, void* new_val) {
       
  3808   Atomic::inc(&num_ct_writes);
       
  3809   if (filter_val == NULL) {
       
  3810     Atomic::inc(&num_ct_writes_filtered_in_hr);
       
  3811   } else if (new_val == NULL) {
       
  3812     Atomic::inc(&num_ct_writes_filtered_null);
       
  3813   } else {
       
  3814     if (g1 == NULL) {
       
  3815       g1 = G1CollectedHeap::heap();
       
  3816     }
       
  3817     if ((HeapWord*)new_val < g1->popular_object_boundary()) {
       
  3818       Atomic::inc(&num_ct_writes_filtered_pop);
       
  3819     }
       
  3820   }
       
  3821   if ((num_ct_writes % 1000000) == 0) {
       
  3822     jint num_ct_writes_filtered =
       
  3823       num_ct_writes_filtered_in_hr +
       
  3824       num_ct_writes_filtered_null +
       
  3825       num_ct_writes_filtered_pop;
       
  3826 
       
  3827     tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
       
  3828                   "   (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
       
  3829                   num_ct_writes,
       
  3830                   100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
       
  3831                   100.0*(float)num_ct_writes_filtered_in_hr/
       
  3832                   (float)num_ct_writes,
       
  3833                   100.0*(float)num_ct_writes_filtered_null/
       
  3834                   (float)num_ct_writes,
       
  3835                   100.0*(float)num_ct_writes_filtered_pop/
       
  3836                   (float)num_ct_writes);
       
  3837   }
       
  3838   return Thread::current();
       
  3839 }
       
  3840 
       
  3841 static address dirty_card_log_enqueue = 0;
       
  3842 static u_char* dirty_card_log_enqueue_end = 0;
       
  3843 
       
  3844 // This gets to assume that o0 contains the object address.
       
  3845 static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
       
  3846   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
       
  3847   CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
       
  3848   MacroAssembler masm(&buf);
       
  3849   address start = masm.pc();
       
  3850 
       
  3851   Label not_already_dirty, restart, refill;
       
  3852 
       
  3853 #ifdef _LP64
       
  3854   masm.srlx(O0, CardTableModRefBS::card_shift, O0);
       
  3855 #else
       
  3856   masm.srl(O0, CardTableModRefBS::card_shift, O0);
       
  3857 #endif
       
  3858   Address rs(O1, (address)byte_map_base);
       
  3859   masm.load_address(rs); // O1 := <card table base>
       
  3860   masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
       
  3861 
       
  3862   masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
       
  3863                       O2, not_already_dirty);
       
  3864   // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
       
  3865   // case, harmless if not.
       
  3866   masm.delayed()->add(O0, O1, O3);
       
  3867 
       
  3868   // We didn't take the branch, so we're already dirty: return.
       
  3869   // Use return-from-leaf
       
  3870   masm.retl();
       
  3871   masm.delayed()->nop();
       
  3872 
       
  3873   // Not dirty.
       
  3874   masm.bind(not_already_dirty);
       
  3875   // First, dirty it.
       
  3876   masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
       
  3877   int dirty_card_q_index_byte_offset =
       
  3878     in_bytes(JavaThread::dirty_card_queue_offset() +
       
  3879              PtrQueue::byte_offset_of_index());
       
  3880   int dirty_card_q_buf_byte_offset =
       
  3881     in_bytes(JavaThread::dirty_card_queue_offset() +
       
  3882              PtrQueue::byte_offset_of_buf());
       
  3883   masm.bind(restart);
       
  3884   masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
       
  3885 
       
  3886   masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
       
  3887                       L0, refill);
       
  3888   // If the branch is taken, no harm in executing this in the delay slot.
       
  3889   masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
       
  3890   masm.sub(L0, oopSize, L0);
       
  3891 
       
  3892   masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
       
  3893   // Use return-from-leaf
       
  3894   masm.retl();
       
  3895   masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
       
  3896 
       
  3897   masm.bind(refill);
       
  3898   address handle_zero =
       
  3899     CAST_FROM_FN_PTR(address,
       
  3900                      &DirtyCardQueueSet::handle_zero_index_for_thread);
       
  3901   // This should be rare enough that we can afford to save all the
       
  3902   // scratch registers that the calling context might be using.
       
  3903   masm.mov(G1_scratch, L3);
       
  3904   masm.mov(G3_scratch, L5);
       
  3905   // We need the value of O3 above (for the write into the buffer), so we
       
  3906   // save and restore it.
       
  3907   masm.mov(O3, L6);
       
  3908   // Since the call will overwrite O7, we save and restore that, as well.
       
  3909   masm.mov(O7, L4);
       
  3910 
       
  3911   masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
       
  3912   masm.mov(L3, G1_scratch);
       
  3913   masm.mov(L5, G3_scratch);
       
  3914   masm.mov(L6, O3);
       
  3915   masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
       
  3916   masm.delayed()->mov(L4, O7);
       
  3917 
       
  3918   dirty_card_log_enqueue = start;
       
  3919   dirty_card_log_enqueue_end = masm.pc();
       
  3920   // XXX Should have a guarantee here about not going off the end!
       
  3921   // Does it already do so?  Do an experiment...
       
  3922 }
       
  3923 
       
  3924 static inline void
       
  3925 generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
       
  3926   if (dirty_card_log_enqueue == 0) {
       
  3927     generate_dirty_card_log_enqueue(byte_map_base);
       
  3928     assert(dirty_card_log_enqueue != 0, "postcondition.");
       
  3929     if (G1SATBPrintStubs) {
       
  3930       tty->print_cr("Generated dirty_card enqueue:");
       
  3931       Disassembler::decode((u_char*)dirty_card_log_enqueue,
       
  3932                            dirty_card_log_enqueue_end,
       
  3933                            tty);
       
  3934     }
       
  3935   }
       
  3936 }
       
  3937 
       
  3938 
       
  3939 void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
       
  3940 
       
  3941   Label filtered;
       
  3942   MacroAssembler* post_filter_masm = this;
       
  3943 
       
  3944   if (new_val == G0) return;
       
  3945   if (G1DisablePostBarrier) return;
       
  3946 
       
  3947   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
       
  3948   assert(bs->kind() == BarrierSet::G1SATBCT ||
       
  3949          bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
       
  3950   if (G1RSBarrierRegionFilter) {
       
  3951     xor3(store_addr, new_val, tmp);
       
  3952 #ifdef _LP64
       
  3953     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
       
  3954 #else
       
  3955     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
       
  3956 #endif
       
  3957     if (G1PrintCTFilterStats) {
       
  3958       guarantee(tmp->is_global(), "Or stats won't work...");
       
  3959       // This is a sleazy hack: I'm temporarily hijacking G2, which I
       
  3960       // promise to restore.
       
  3961       mov(new_val, G2);
       
  3962       save_frame(0);
       
  3963       mov(tmp, O0);
       
  3964       mov(G2, O1);
       
  3965       // Save G-regs that target may use.
       
  3966       mov(G1, L1);
       
  3967       mov(G2, L2);
       
  3968       mov(G3, L3);
       
  3969       mov(G4, L4);
       
  3970       mov(G5, L5);
       
  3971       call(CAST_FROM_FN_PTR(address, &count_ct_writes));
       
  3972       delayed()->nop();
       
  3973       mov(O0, G2);
       
  3974       // Restore G-regs that target may have used.
       
  3975       mov(L1, G1);
       
  3976       mov(L3, G3);
       
  3977       mov(L4, G4);
       
  3978       mov(L5, G5);
       
  3979       restore(G0, G0, G0);
       
  3980     }
       
  3981     // XXX Should I predict this taken or not?  Does it mattern?
       
  3982     br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
       
  3983     delayed()->nop();
       
  3984   }
       
  3985 
       
  3986   // Now we decide how to generate the card table write.  If we're
       
  3987   // enqueueing, we call out to a generated function.  Otherwise, we do it
       
  3988   // inline here.
       
  3989 
       
  3990   if (G1RSBarrierUseQueue) {
       
  3991     // If the "store_addr" register is an "in" or "local" register, move it to
       
  3992     // a scratch reg so we can pass it as an argument.
       
  3993     bool use_scr = !(store_addr->is_global() || store_addr->is_out());
       
  3994     // Pick a scratch register different from "tmp".
       
  3995     Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
       
  3996     // Make sure we use up the delay slot!
       
  3997     if (use_scr) {
       
  3998       post_filter_masm->mov(store_addr, scr);
       
  3999     } else {
       
  4000       post_filter_masm->nop();
       
  4001     }
       
  4002     generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
       
  4003     save_frame(0);
       
  4004     call(dirty_card_log_enqueue);
       
  4005     if (use_scr) {
       
  4006       delayed()->mov(scr, O0);
       
  4007     } else {
       
  4008       delayed()->mov(store_addr->after_save(), O0);
       
  4009     }
       
  4010     restore();
       
  4011 
       
  4012   } else {
       
  4013 
       
  4014 #ifdef _LP64
       
  4015     post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
       
  4016 #else
       
  4017     post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
       
  4018 #endif
       
  4019     assert( tmp != store_addr, "need separate temp reg");
       
  4020     Address rs(tmp, (address)bs->byte_map_base);
       
  4021     load_address(rs);
       
  4022     stb(G0, rs.base(), store_addr);
       
  4023   }
       
  4024 
       
  4025   bind(filtered);
       
  4026 
       
  4027 }
       
  4028 
       
  4029 #endif  // SERIALGC
       
  4030 ///////////////////////////////////////////////////////////////////////////////////
       
  4031 
       
  4032 void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
       
  4033   // If we're writing constant NULL, we can skip the write barrier.
       
  4034   if (new_val == G0) return;
       
  4035   CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
       
  4036   assert(bs->kind() == BarrierSet::CardTableModRef ||
       
  4037          bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
       
  4038   card_table_write(bs->byte_map_base, tmp, store_addr);
       
  4039 }
       
  4040 
  3540 void MacroAssembler::load_klass(Register s, Register d) {
  4041 void MacroAssembler::load_klass(Register s, Register d) {
  3541   // The number of bytes in this code is used by
  4042   // The number of bytes in this code is used by
  3542   // MachCallDynamicJavaNode::ret_addr_offset()
  4043   // MachCallDynamicJavaNode::ret_addr_offset()
  3543   // if this changes, change that.
  4044   // if this changes, change that.
  3544   if (UseCompressedOops) {
  4045   if (UseCompressedOops) {