hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
changeset 10497 1bcff72a4b82
parent 10252 0981ce1c3eef
child 10519 fb373fa38321
equal deleted inserted replaced
10496:b209db6147cf 10497:1bcff72a4b82
  2157   bpr( rc_nz, a, p, s1, L );
  2157   bpr( rc_nz, a, p, s1, L );
  2158 #else
  2158 #else
  2159   tst(s1);
  2159   tst(s1);
  2160   br ( notZero, a, p, L );
  2160   br ( notZero, a, p, L );
  2161 #endif
  2161 #endif
  2162 }
       
  2163 
       
  2164 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
       
  2165                                      Register s1, address d,
       
  2166                                      relocInfo::relocType rt ) {
       
  2167   assert_not_delayed();
       
  2168   if (VM_Version::v9_instructions_work()) {
       
  2169     bpr(rc, a, p, s1, d, rt);
       
  2170   } else {
       
  2171     tst(s1);
       
  2172     br(reg_cond_to_cc_cond(rc), a, p, d, rt);
       
  2173   }
       
  2174 }
       
  2175 
       
  2176 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
       
  2177                                      Register s1, Label& L ) {
       
  2178   assert_not_delayed();
       
  2179   if (VM_Version::v9_instructions_work()) {
       
  2180     bpr(rc, a, p, s1, L);
       
  2181   } else {
       
  2182     tst(s1);
       
  2183     br(reg_cond_to_cc_cond(rc), a, p, L);
       
  2184   }
       
  2185 }
  2162 }
  2186 
  2163 
  2187 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
  2164 // Compare registers and branch with nop in delay slot or cbcond without delay slot.
  2188 
  2165 
  2189 // Compare integer (32 bit) values (icc only).
  2166 // Compare integer (32 bit) values (icc only).
  4338     __ save_frame(0);
  4315     __ save_frame(0);
  4339     pre_val = I0;  // Was O0 before the save.
  4316     pre_val = I0;  // Was O0 before the save.
  4340   } else {
  4317   } else {
  4341     pre_val = O0;
  4318     pre_val = O0;
  4342   }
  4319   }
       
  4320 
  4343   int satb_q_index_byte_offset =
  4321   int satb_q_index_byte_offset =
  4344     in_bytes(JavaThread::satb_mark_queue_offset() +
  4322     in_bytes(JavaThread::satb_mark_queue_offset() +
  4345              PtrQueue::byte_offset_of_index());
  4323              PtrQueue::byte_offset_of_index());
       
  4324 
  4346   int satb_q_buf_byte_offset =
  4325   int satb_q_buf_byte_offset =
  4347     in_bytes(JavaThread::satb_mark_queue_offset() +
  4326     in_bytes(JavaThread::satb_mark_queue_offset() +
  4348              PtrQueue::byte_offset_of_buf());
  4327              PtrQueue::byte_offset_of_buf());
       
  4328 
  4349   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
  4329   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
  4350          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
  4330          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
  4351          "check sizes in assembly below");
  4331          "check sizes in assembly below");
  4352 
  4332 
  4353   __ bind(restart);
  4333   __ bind(restart);
       
  4334 
       
  4335   // Load the index into the SATB buffer. PtrQueue::_index is a size_t
       
  4336   // so ld_ptr is appropriate.
  4354   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
  4337   __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
  4355 
  4338 
  4356   __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
  4339   // index == 0?
  4357   // If the branch is taken, no harm in executing this in the delay slot.
  4340   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
  4358   __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
  4341 
       
  4342   __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
  4359   __ sub(L0, oopSize, L0);
  4343   __ sub(L0, oopSize, L0);
  4360 
  4344 
  4361   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
  4345   __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
  4362   if (!with_frame) {
  4346   if (!with_frame) {
  4363     // Use return-from-leaf
  4347     // Use return-from-leaf
  4464          in_bytes(JavaThread::satb_mark_queue_offset() +
  4448          in_bytes(JavaThread::satb_mark_queue_offset() +
  4465                   PtrQueue::byte_offset_of_active()),
  4449                   PtrQueue::byte_offset_of_active()),
  4466          tmp);
  4450          tmp);
  4467   }
  4451   }
  4468 
  4452 
  4469   // Check on whether to annul.
  4453   // Is marking active?
  4470   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
  4454   cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
  4471   delayed()->nop();
       
  4472 
  4455 
  4473   // Do we need to load the previous value?
  4456   // Do we need to load the previous value?
  4474   if (obj != noreg) {
  4457   if (obj != noreg) {
  4475     // Load the previous value...
  4458     // Load the previous value...
  4476     if (index == noreg) {
  4459     if (index == noreg) {
  4488   }
  4471   }
  4489 
  4472 
  4490   assert(pre_val != noreg, "must have a real register");
  4473   assert(pre_val != noreg, "must have a real register");
  4491 
  4474 
  4492   // Is the previous value null?
  4475   // Is the previous value null?
  4493   // Check on whether to annul.
  4476   cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered);
  4494   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
       
  4495   delayed()->nop();
       
  4496 
  4477 
  4497   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
  4478   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
  4498   // case, pre_val will be a scratch G-reg, but there are some cases in
  4479   // case, pre_val will be a scratch G-reg, but there are some cases in
  4499   // which it's an O-reg.  In the first case, do a normal call.  In the
  4480   // which it's an O-reg.  In the first case, do a normal call.  In the
  4500   // latter, do a save here and call the frameless version.
  4481   // latter, do a save here and call the frameless version.
  4515     delayed()->mov(pre_val->after_save(), O0);
  4496     delayed()->mov(pre_val->after_save(), O0);
  4516     restore();
  4497     restore();
  4517   }
  4498   }
  4518 
  4499 
  4519   bind(filtered);
  4500   bind(filtered);
  4520 }
       
  4521 
       
  4522 static jint num_ct_writes = 0;
       
  4523 static jint num_ct_writes_filtered_in_hr = 0;
       
  4524 static jint num_ct_writes_filtered_null = 0;
       
  4525 static G1CollectedHeap* g1 = NULL;
       
  4526 
       
  4527 static Thread* count_ct_writes(void* filter_val, void* new_val) {
       
  4528   Atomic::inc(&num_ct_writes);
       
  4529   if (filter_val == NULL) {
       
  4530     Atomic::inc(&num_ct_writes_filtered_in_hr);
       
  4531   } else if (new_val == NULL) {
       
  4532     Atomic::inc(&num_ct_writes_filtered_null);
       
  4533   } else {
       
  4534     if (g1 == NULL) {
       
  4535       g1 = G1CollectedHeap::heap();
       
  4536     }
       
  4537   }
       
  4538   if ((num_ct_writes % 1000000) == 0) {
       
  4539     jint num_ct_writes_filtered =
       
  4540       num_ct_writes_filtered_in_hr +
       
  4541       num_ct_writes_filtered_null;
       
  4542 
       
  4543     tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
       
  4544                   "   (%5.2f%% intra-HR, %5.2f%% null).",
       
  4545                   num_ct_writes,
       
  4546                   100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
       
  4547                   100.0*(float)num_ct_writes_filtered_in_hr/
       
  4548                   (float)num_ct_writes,
       
  4549                   100.0*(float)num_ct_writes_filtered_null/
       
  4550                   (float)num_ct_writes);
       
  4551   }
       
  4552   return Thread::current();
       
  4553 }
  4501 }
  4554 
  4502 
  4555 static address dirty_card_log_enqueue = 0;
  4503 static address dirty_card_log_enqueue = 0;
  4556 static u_char* dirty_card_log_enqueue_end = 0;
  4504 static u_char* dirty_card_log_enqueue_end = 0;
  4557 
  4505 
  4572 #endif
  4520 #endif
  4573   AddressLiteral addrlit(byte_map_base);
  4521   AddressLiteral addrlit(byte_map_base);
  4574   __ set(addrlit, O1); // O1 := <card table base>
  4522   __ set(addrlit, O1); // O1 := <card table base>
  4575   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
  4523   __ ldub(O0, O1, O2); // O2 := [O0 + O1]
  4576 
  4524 
  4577   __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
  4525   assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code");
  4578                       O2, not_already_dirty);
  4526   __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty);
  4579   // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
       
  4580   // case, harmless if not.
       
  4581   __ delayed()->add(O0, O1, O3);
       
  4582 
  4527 
  4583   // We didn't take the branch, so we're already dirty: return.
  4528   // We didn't take the branch, so we're already dirty: return.
  4584   // Use return-from-leaf
  4529   // Use return-from-leaf
  4585   __ retl();
  4530   __ retl();
  4586   __ delayed()->nop();
  4531   __ delayed()->nop();
  4587 
  4532 
  4588   // Not dirty.
  4533   // Not dirty.
  4589   __ bind(not_already_dirty);
  4534   __ bind(not_already_dirty);
       
  4535 
       
  4536   // Get O0 + O1 into a reg by itself
       
  4537   __ add(O0, O1, O3);
       
  4538 
  4590   // First, dirty it.
  4539   // First, dirty it.
  4591   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
  4540   __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
       
  4541 
  4592   int dirty_card_q_index_byte_offset =
  4542   int dirty_card_q_index_byte_offset =
  4593     in_bytes(JavaThread::dirty_card_queue_offset() +
  4543     in_bytes(JavaThread::dirty_card_queue_offset() +
  4594              PtrQueue::byte_offset_of_index());
  4544              PtrQueue::byte_offset_of_index());
  4595   int dirty_card_q_buf_byte_offset =
  4545   int dirty_card_q_buf_byte_offset =
  4596     in_bytes(JavaThread::dirty_card_queue_offset() +
  4546     in_bytes(JavaThread::dirty_card_queue_offset() +
  4597              PtrQueue::byte_offset_of_buf());
  4547              PtrQueue::byte_offset_of_buf());
  4598   __ bind(restart);
  4548   __ bind(restart);
       
  4549 
       
  4550   // Load the index into the update buffer. PtrQueue::_index is
       
  4551   // a size_t so ld_ptr is appropriate here.
  4599   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
  4552   __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
  4600 
  4553 
  4601   __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
  4554   // index == 0?
  4602                       L0, refill);
  4555   __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill);
  4603   // If the branch is taken, no harm in executing this in the delay slot.
  4556 
  4604   __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
  4557   __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
  4605   __ sub(L0, oopSize, L0);
  4558   __ sub(L0, oopSize, L0);
  4606 
  4559 
  4607   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
  4560   __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
  4608   // Use return-from-leaf
  4561   // Use return-from-leaf
  4609   __ retl();
  4562   __ retl();
  4662   if (new_val == G0) return;
  4615   if (new_val == G0) return;
  4663 
  4616 
  4664   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
  4617   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
  4665   assert(bs->kind() == BarrierSet::G1SATBCT ||
  4618   assert(bs->kind() == BarrierSet::G1SATBCT ||
  4666          bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
  4619          bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
       
  4620 
  4667   if (G1RSBarrierRegionFilter) {
  4621   if (G1RSBarrierRegionFilter) {
  4668     xor3(store_addr, new_val, tmp);
  4622     xor3(store_addr, new_val, tmp);
  4669 #ifdef _LP64
  4623 #ifdef _LP64
  4670     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
  4624     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
  4671 #else
  4625 #else
  4672     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
  4626     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
  4673 #endif
  4627 #endif
  4674 
  4628 
  4675     if (G1PrintCTFilterStats) {
  4629     // XXX Should I predict this taken or not?  Does it matter?
  4676       guarantee(tmp->is_global(), "Or stats won't work...");
  4630     cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered);
  4677       // This is a sleazy hack: I'm temporarily hijacking G2, which I
       
  4678       // promise to restore.
       
  4679       mov(new_val, G2);
       
  4680       save_frame(0);
       
  4681       mov(tmp, O0);
       
  4682       mov(G2, O1);
       
  4683       // Save G-regs that target may use.
       
  4684       mov(G1, L1);
       
  4685       mov(G2, L2);
       
  4686       mov(G3, L3);
       
  4687       mov(G4, L4);
       
  4688       mov(G5, L5);
       
  4689       call(CAST_FROM_FN_PTR(address, &count_ct_writes));
       
  4690       delayed()->nop();
       
  4691       mov(O0, G2);
       
  4692       // Restore G-regs that target may have used.
       
  4693       mov(L1, G1);
       
  4694       mov(L3, G3);
       
  4695       mov(L4, G4);
       
  4696       mov(L5, G5);
       
  4697       restore(G0, G0, G0);
       
  4698     }
       
  4699     // XXX Should I predict this taken or not?  Does it mattern?
       
  4700     br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
       
  4701     delayed()->nop();
       
  4702   }
  4631   }
  4703 
  4632 
  4704   // If the "store_addr" register is an "in" or "local" register, move it to
  4633   // If the "store_addr" register is an "in" or "local" register, move it to
  4705   // a scratch reg so we can pass it as an argument.
  4634   // a scratch reg so we can pass it as an argument.
  4706   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
  4635   bool use_scr = !(store_addr->is_global() || store_addr->is_out());
  4721     delayed()->mov(store_addr->after_save(), O0);
  4650     delayed()->mov(store_addr->after_save(), O0);
  4722   }
  4651   }
  4723   restore();
  4652   restore();
  4724 
  4653 
  4725   bind(filtered);
  4654   bind(filtered);
  4726 
       
  4727 }
  4655 }
  4728 
  4656 
  4729 #endif  // SERIALGC
  4657 #endif  // SERIALGC
  4730 ///////////////////////////////////////////////////////////////////////////////////
  4658 ///////////////////////////////////////////////////////////////////////////////////
  4731 
  4659