2157 bpr( rc_nz, a, p, s1, L ); |
2157 bpr( rc_nz, a, p, s1, L ); |
2158 #else |
2158 #else |
2159 tst(s1); |
2159 tst(s1); |
2160 br ( notZero, a, p, L ); |
2160 br ( notZero, a, p, L ); |
2161 #endif |
2161 #endif |
2162 } |
|
2163 |
|
2164 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, |
|
2165 Register s1, address d, |
|
2166 relocInfo::relocType rt ) { |
|
2167 assert_not_delayed(); |
|
2168 if (VM_Version::v9_instructions_work()) { |
|
2169 bpr(rc, a, p, s1, d, rt); |
|
2170 } else { |
|
2171 tst(s1); |
|
2172 br(reg_cond_to_cc_cond(rc), a, p, d, rt); |
|
2173 } |
|
2174 } |
|
2175 |
|
2176 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p, |
|
2177 Register s1, Label& L ) { |
|
2178 assert_not_delayed(); |
|
2179 if (VM_Version::v9_instructions_work()) { |
|
2180 bpr(rc, a, p, s1, L); |
|
2181 } else { |
|
2182 tst(s1); |
|
2183 br(reg_cond_to_cc_cond(rc), a, p, L); |
|
2184 } |
|
2185 } |
2162 } |
2186 |
2163 |
2187 // Compare registers and branch with nop in delay slot or cbcond without delay slot. |
2164 // Compare registers and branch with nop in delay slot or cbcond without delay slot. |
2188 |
2165 |
2189 // Compare integer (32 bit) values (icc only). |
2166 // Compare integer (32 bit) values (icc only). |
4338 __ save_frame(0); |
4315 __ save_frame(0); |
4339 pre_val = I0; // Was O0 before the save. |
4316 pre_val = I0; // Was O0 before the save. |
4340 } else { |
4317 } else { |
4341 pre_val = O0; |
4318 pre_val = O0; |
4342 } |
4319 } |
|
4320 |
4343 int satb_q_index_byte_offset = |
4321 int satb_q_index_byte_offset = |
4344 in_bytes(JavaThread::satb_mark_queue_offset() + |
4322 in_bytes(JavaThread::satb_mark_queue_offset() + |
4345 PtrQueue::byte_offset_of_index()); |
4323 PtrQueue::byte_offset_of_index()); |
|
4324 |
4346 int satb_q_buf_byte_offset = |
4325 int satb_q_buf_byte_offset = |
4347 in_bytes(JavaThread::satb_mark_queue_offset() + |
4326 in_bytes(JavaThread::satb_mark_queue_offset() + |
4348 PtrQueue::byte_offset_of_buf()); |
4327 PtrQueue::byte_offset_of_buf()); |
|
4328 |
4349 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && |
4329 assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && |
4350 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), |
4330 in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), |
4351 "check sizes in assembly below"); |
4331 "check sizes in assembly below"); |
4352 |
4332 |
4353 __ bind(restart); |
4333 __ bind(restart); |
|
4334 |
|
4335 // Load the index into the SATB buffer. PtrQueue::_index is a size_t |
|
4336 // so ld_ptr is appropriate. |
4354 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); |
4337 __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); |
4355 |
4338 |
4356 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill); |
4339 // index == 0? |
4357 // If the branch is taken, no harm in executing this in the delay slot. |
4340 __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); |
4358 __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); |
4341 |
|
4342 __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); |
4359 __ sub(L0, oopSize, L0); |
4343 __ sub(L0, oopSize, L0); |
4360 |
4344 |
4361 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 |
4345 __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 |
4362 if (!with_frame) { |
4346 if (!with_frame) { |
4363 // Use return-from-leaf |
4347 // Use return-from-leaf |
4464 in_bytes(JavaThread::satb_mark_queue_offset() + |
4448 in_bytes(JavaThread::satb_mark_queue_offset() + |
4465 PtrQueue::byte_offset_of_active()), |
4449 PtrQueue::byte_offset_of_active()), |
4466 tmp); |
4450 tmp); |
4467 } |
4451 } |
4468 |
4452 |
4469 // Check on whether to annul. |
4453 // Is marking active? |
4470 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); |
4454 cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); |
4471 delayed()->nop(); |
|
4472 |
4455 |
4473 // Do we need to load the previous value? |
4456 // Do we need to load the previous value? |
4474 if (obj != noreg) { |
4457 if (obj != noreg) { |
4475 // Load the previous value... |
4458 // Load the previous value... |
4476 if (index == noreg) { |
4459 if (index == noreg) { |
4488 } |
4471 } |
4489 |
4472 |
4490 assert(pre_val != noreg, "must have a real register"); |
4473 assert(pre_val != noreg, "must have a real register"); |
4491 |
4474 |
4492 // Is the previous value null? |
4475 // Is the previous value null? |
4493 // Check on whether to annul. |
4476 cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); |
4494 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered); |
|
4495 delayed()->nop(); |
|
4496 |
4477 |
4497 // OK, it's not filtered, so we'll need to call enqueue. In the normal |
4478 // OK, it's not filtered, so we'll need to call enqueue. In the normal |
4498 // case, pre_val will be a scratch G-reg, but there are some cases in |
4479 // case, pre_val will be a scratch G-reg, but there are some cases in |
4499 // which it's an O-reg. In the first case, do a normal call. In the |
4480 // which it's an O-reg. In the first case, do a normal call. In the |
4500 // latter, do a save here and call the frameless version. |
4481 // latter, do a save here and call the frameless version. |
4515 delayed()->mov(pre_val->after_save(), O0); |
4496 delayed()->mov(pre_val->after_save(), O0); |
4516 restore(); |
4497 restore(); |
4517 } |
4498 } |
4518 |
4499 |
4519 bind(filtered); |
4500 bind(filtered); |
4520 } |
|
4521 |
|
4522 static jint num_ct_writes = 0; |
|
4523 static jint num_ct_writes_filtered_in_hr = 0; |
|
4524 static jint num_ct_writes_filtered_null = 0; |
|
4525 static G1CollectedHeap* g1 = NULL; |
|
4526 |
|
4527 static Thread* count_ct_writes(void* filter_val, void* new_val) { |
|
4528 Atomic::inc(&num_ct_writes); |
|
4529 if (filter_val == NULL) { |
|
4530 Atomic::inc(&num_ct_writes_filtered_in_hr); |
|
4531 } else if (new_val == NULL) { |
|
4532 Atomic::inc(&num_ct_writes_filtered_null); |
|
4533 } else { |
|
4534 if (g1 == NULL) { |
|
4535 g1 = G1CollectedHeap::heap(); |
|
4536 } |
|
4537 } |
|
4538 if ((num_ct_writes % 1000000) == 0) { |
|
4539 jint num_ct_writes_filtered = |
|
4540 num_ct_writes_filtered_in_hr + |
|
4541 num_ct_writes_filtered_null; |
|
4542 |
|
4543 tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" |
|
4544 " (%5.2f%% intra-HR, %5.2f%% null).", |
|
4545 num_ct_writes, |
|
4546 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, |
|
4547 100.0*(float)num_ct_writes_filtered_in_hr/ |
|
4548 (float)num_ct_writes, |
|
4549 100.0*(float)num_ct_writes_filtered_null/ |
|
4550 (float)num_ct_writes); |
|
4551 } |
|
4552 return Thread::current(); |
|
4553 } |
4501 } |
4554 |
4502 |
4555 static address dirty_card_log_enqueue = 0; |
4503 static address dirty_card_log_enqueue = 0; |
4556 static u_char* dirty_card_log_enqueue_end = 0; |
4504 static u_char* dirty_card_log_enqueue_end = 0; |
4557 |
4505 |
4572 #endif |
4520 #endif |
4573 AddressLiteral addrlit(byte_map_base); |
4521 AddressLiteral addrlit(byte_map_base); |
4574 __ set(addrlit, O1); // O1 := <card table base> |
4522 __ set(addrlit, O1); // O1 := <card table base> |
4575 __ ldub(O0, O1, O2); // O2 := [O0 + O1] |
4523 __ ldub(O0, O1, O2); // O2 := [O0 + O1] |
4576 |
4524 |
4577 __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt, |
4525 assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); |
4578 O2, not_already_dirty); |
4526 __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); |
4579 // Get O1 + O2 into a reg by itself -- useful in the take-the-branch |
|
4580 // case, harmless if not. |
|
4581 __ delayed()->add(O0, O1, O3); |
|
4582 |
4527 |
4583 // We didn't take the branch, so we're already dirty: return. |
4528 // We didn't take the branch, so we're already dirty: return. |
4584 // Use return-from-leaf |
4529 // Use return-from-leaf |
4585 __ retl(); |
4530 __ retl(); |
4586 __ delayed()->nop(); |
4531 __ delayed()->nop(); |
4587 |
4532 |
4588 // Not dirty. |
4533 // Not dirty. |
4589 __ bind(not_already_dirty); |
4534 __ bind(not_already_dirty); |
|
4535 |
|
4536 // Get O0 + O1 into a reg by itself |
|
4537 __ add(O0, O1, O3); |
|
4538 |
4590 // First, dirty it. |
4539 // First, dirty it. |
4591 __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). |
4540 __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). |
|
4541 |
4592 int dirty_card_q_index_byte_offset = |
4542 int dirty_card_q_index_byte_offset = |
4593 in_bytes(JavaThread::dirty_card_queue_offset() + |
4543 in_bytes(JavaThread::dirty_card_queue_offset() + |
4594 PtrQueue::byte_offset_of_index()); |
4544 PtrQueue::byte_offset_of_index()); |
4595 int dirty_card_q_buf_byte_offset = |
4545 int dirty_card_q_buf_byte_offset = |
4596 in_bytes(JavaThread::dirty_card_queue_offset() + |
4546 in_bytes(JavaThread::dirty_card_queue_offset() + |
4597 PtrQueue::byte_offset_of_buf()); |
4547 PtrQueue::byte_offset_of_buf()); |
4598 __ bind(restart); |
4548 __ bind(restart); |
|
4549 |
|
4550 // Load the index into the update buffer. PtrQueue::_index is |
|
4551 // a size_t so ld_ptr is appropriate here. |
4599 __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); |
4552 __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); |
4600 |
4553 |
4601 __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, |
4554 // index == 0? |
4602 L0, refill); |
4555 __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); |
4603 // If the branch is taken, no harm in executing this in the delay slot. |
4556 |
4604 __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); |
4557 __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); |
4605 __ sub(L0, oopSize, L0); |
4558 __ sub(L0, oopSize, L0); |
4606 |
4559 |
4607 __ st_ptr(O3, L1, L0); // [_buf + index] := I0 |
4560 __ st_ptr(O3, L1, L0); // [_buf + index] := I0 |
4608 // Use return-from-leaf |
4561 // Use return-from-leaf |
4609 __ retl(); |
4562 __ retl(); |
4662 if (new_val == G0) return; |
4615 if (new_val == G0) return; |
4663 |
4616 |
4664 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); |
4617 G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); |
4665 assert(bs->kind() == BarrierSet::G1SATBCT || |
4618 assert(bs->kind() == BarrierSet::G1SATBCT || |
4666 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); |
4619 bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); |
|
4620 |
4667 if (G1RSBarrierRegionFilter) { |
4621 if (G1RSBarrierRegionFilter) { |
4668 xor3(store_addr, new_val, tmp); |
4622 xor3(store_addr, new_val, tmp); |
4669 #ifdef _LP64 |
4623 #ifdef _LP64 |
4670 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
4624 srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
4671 #else |
4625 #else |
4672 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
4626 srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); |
4673 #endif |
4627 #endif |
4674 |
4628 |
4675 if (G1PrintCTFilterStats) { |
4629 // XXX Should I predict this taken or not? Does it matter? |
4676 guarantee(tmp->is_global(), "Or stats won't work..."); |
4630 cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); |
4677 // This is a sleazy hack: I'm temporarily hijacking G2, which I |
|
4678 // promise to restore. |
|
4679 mov(new_val, G2); |
|
4680 save_frame(0); |
|
4681 mov(tmp, O0); |
|
4682 mov(G2, O1); |
|
4683 // Save G-regs that target may use. |
|
4684 mov(G1, L1); |
|
4685 mov(G2, L2); |
|
4686 mov(G3, L3); |
|
4687 mov(G4, L4); |
|
4688 mov(G5, L5); |
|
4689 call(CAST_FROM_FN_PTR(address, &count_ct_writes)); |
|
4690 delayed()->nop(); |
|
4691 mov(O0, G2); |
|
4692 // Restore G-regs that target may have used. |
|
4693 mov(L1, G1); |
|
4694 mov(L3, G3); |
|
4695 mov(L4, G4); |
|
4696 mov(L5, G5); |
|
4697 restore(G0, G0, G0); |
|
4698 } |
|
4699 // XXX Should I predict this taken or not? Does it mattern? |
|
4700 br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered); |
|
4701 delayed()->nop(); |
|
4702 } |
4631 } |
4703 |
4632 |
4704 // If the "store_addr" register is an "in" or "local" register, move it to |
4633 // If the "store_addr" register is an "in" or "local" register, move it to |
4705 // a scratch reg so we can pass it as an argument. |
4634 // a scratch reg so we can pass it as an argument. |
4706 bool use_scr = !(store_addr->is_global() || store_addr->is_out()); |
4635 bool use_scr = !(store_addr->is_global() || store_addr->is_out()); |