--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Wed Jun 04 13:51:09 2008 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp Thu Jun 05 15:57:56 2008 -0700
@@ -130,6 +130,20 @@
return 0x00; // illegal instruction 0x00000000
}
+Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+ switch (in) {
+ case rc_z: return equal;
+ case rc_lez: return lessEqual;
+ case rc_lz: return less;
+ case rc_nz: return notEqual;
+ case rc_gz: return greater;
+ case rc_gez: return greaterEqual;
+ default:
+ ShouldNotReachHere();
+ }
+ return equal;
+}
+
// Generate a bunch 'o stuff (including v9's
#ifndef PRODUCT
void Assembler::test_v9() {
@@ -1213,31 +1227,19 @@
}
-void MacroAssembler::store_check(Register tmp, Register obj) {
- // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
-
- /* $$$ This stuff needs to go into one of the BarrierSet generator
- functions. (The particular barrier sets will have to be friends of
- MacroAssembler, I guess.) */
- BarrierSet* bs = Universe::heap()->barrier_set();
- assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
- CardTableModRefBS* ct = (CardTableModRefBS*)bs;
- assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+ Register tmp, Register obj) {
#ifdef _LP64
srlx(obj, CardTableModRefBS::card_shift, obj);
#else
srl(obj, CardTableModRefBS::card_shift, obj);
#endif
assert( tmp != obj, "need separate temp reg");
- Address rs(tmp, (address)ct->byte_map_base);
+ Address rs(tmp, (address)byte_map_base);
load_address(rs);
stb(G0, rs.base(), obj);
}
-void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
- store_check(tmp, obj);
-}
-
// %%% Note: The following six instructions have been moved,
// unchanged, from assembler_sparc.inline.hpp.
// They will be refactored at a later date.
@@ -1648,11 +1650,21 @@
if (reg == G0) return; // always NULL, which is always an oop
- char buffer[16];
+ char buffer[64];
+#ifdef COMPILER1
+ if (CommentedAssembly) {
+ snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+ block_comment(buffer);
+ }
+#endif
+
+ int len = strlen(file) + strlen(msg) + 1 + 4;
sprintf(buffer, "%d", line);
- int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+ len += strlen(buffer);
+ sprintf(buffer, " at offset %d ", offset());
+ len += strlen(buffer);
char * real_msg = new char[len];
- sprintf(real_msg, "%s (%s:%d)", msg, file, line);
+ sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
// Call indirectly to solve generation ordering problem
Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@@ -2044,6 +2056,27 @@
#endif
}
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+ Register s1, address d,
+ relocInfo::relocType rt ) {
+ if (VM_Version::v9_instructions_work()) {
+ bpr(rc, a, p, s1, d, rt);
+ } else {
+ tst(s1);
+ br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+ }
+}
+
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+ Register s1, Label& L ) {
+ if (VM_Version::v9_instructions_work()) {
+ bpr(rc, a, p, s1, L);
+ } else {
+ tst(s1);
+ br(reg_cond_to_cc_cond(rc), a, p, L);
+ }
+}
+
// instruction sequences factored across compiler & interpreter
@@ -3226,68 +3259,74 @@
assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
- // get eden boundaries
- // note: we need both top & top_addr!
- const Register top_addr = t1;
- const Register end = t2;
-
- CollectedHeap* ch = Universe::heap();
- set((intx)ch->top_addr(), top_addr);
- intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
- ld_ptr(top_addr, delta, end);
- ld_ptr(top_addr, 0, obj);
-
- // try to allocate
- Label retry;
- bind(retry);
-#ifdef ASSERT
- // make sure eden top is properly aligned
- {
- Label L;
- btst(MinObjAlignmentInBytesMask, obj);
- br(Assembler::zero, false, Assembler::pt, L);
+ if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+ // No allocation in the shared eden.
+ br(Assembler::always, false, Assembler::pt, slow_case);
delayed()->nop();
- stop("eden top is not properly aligned");
- bind(L);
- }
+ } else {
+ // get eden boundaries
+ // note: we need both top & top_addr!
+ const Register top_addr = t1;
+ const Register end = t2;
+
+ CollectedHeap* ch = Universe::heap();
+ set((intx)ch->top_addr(), top_addr);
+ intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+ ld_ptr(top_addr, delta, end);
+ ld_ptr(top_addr, 0, obj);
+
+ // try to allocate
+ Label retry;
+ bind(retry);
+#ifdef ASSERT
+ // make sure eden top is properly aligned
+ {
+ Label L;
+ btst(MinObjAlignmentInBytesMask, obj);
+ br(Assembler::zero, false, Assembler::pt, L);
+ delayed()->nop();
+ stop("eden top is not properly aligned");
+ bind(L);
+ }
#endif // ASSERT
- const Register free = end;
- sub(end, obj, free); // compute amount of free space
- if (var_size_in_bytes->is_valid()) {
- // size is unknown at compile time
- cmp(free, var_size_in_bytes);
- br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
- delayed()->add(obj, var_size_in_bytes, end);
- } else {
- // size is known at compile time
- cmp(free, con_size_in_bytes);
- br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
- delayed()->add(obj, con_size_in_bytes, end);
- }
- // Compare obj with the value at top_addr; if still equal, swap the value of
- // end with the value at top_addr. If not equal, read the value at top_addr
- // into end.
- casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
- // if someone beat us on the allocation, try again, otherwise continue
- cmp(obj, end);
- brx(Assembler::notEqual, false, Assembler::pn, retry);
- delayed()->mov(end, obj); // nop if successfull since obj == end
+ const Register free = end;
+ sub(end, obj, free); // compute amount of free space
+ if (var_size_in_bytes->is_valid()) {
+ // size is unknown at compile time
+ cmp(free, var_size_in_bytes);
+ br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+ delayed()->add(obj, var_size_in_bytes, end);
+ } else {
+ // size is known at compile time
+ cmp(free, con_size_in_bytes);
+ br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+ delayed()->add(obj, con_size_in_bytes, end);
+ }
+ // Compare obj with the value at top_addr; if still equal, swap the value of
+ // end with the value at top_addr. If not equal, read the value at top_addr
+ // into end.
+ casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+ // if someone beat us on the allocation, try again, otherwise continue
+ cmp(obj, end);
+ brx(Assembler::notEqual, false, Assembler::pn, retry);
+ delayed()->mov(end, obj); // nop if successfull since obj == end
#ifdef ASSERT
- // make sure eden top is properly aligned
- {
- Label L;
- const Register top_addr = t1;
-
- set((intx)ch->top_addr(), top_addr);
- ld_ptr(top_addr, 0, top_addr);
- btst(MinObjAlignmentInBytesMask, top_addr);
- br(Assembler::zero, false, Assembler::pt, L);
- delayed()->nop();
- stop("eden top is not properly aligned");
- bind(L);
+ // make sure eden top is properly aligned
+ {
+ Label L;
+ const Register top_addr = t1;
+
+ set((intx)ch->top_addr(), top_addr);
+ ld_ptr(top_addr, 0, top_addr);
+ btst(MinObjAlignmentInBytesMask, top_addr);
+ br(Assembler::zero, false, Assembler::pt, L);
+ delayed()->nop();
+ stop("eden top is not properly aligned");
+ bind(L);
+ }
+#endif // ASSERT
}
-#endif // ASSERT
}
@@ -3537,6 +3576,468 @@
}
}
+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static uint num_stores = 0;
+static uint num_null_pre_stores = 0;
+
+static void count_null_pre_vals(void* pre_val) {
+ num_stores++;
+ if (pre_val == NULL) num_null_pre_stores++;
+ if ((num_stores % 1000000) == 0) {
+ tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+ num_stores, num_null_pre_stores,
+ 100.0*(float)num_null_pre_stores/(float)num_stores);
+ }
+}
+
+static address satb_log_enqueue_with_frame = 0;
+static u_char* satb_log_enqueue_with_frame_end = 0;
+
+static address satb_log_enqueue_frameless = 0;
+static u_char* satb_log_enqueue_frameless_end = 0;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+// The calls to this don't work. We'd need to do a fair amount of work to
+// make it work.
+static void check_index(int ind) {
+ assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+ "Invariants.")
+}
+
+static void generate_satb_log_enqueue(bool with_frame) {
+ BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+ CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+ MacroAssembler masm(&buf);
+ address start = masm.pc();
+ Register pre_val;
+
+ Label refill, restart;
+ if (with_frame) {
+ masm.save_frame(0);
+ pre_val = I0; // Was O0 before the save.
+ } else {
+ pre_val = O0;
+ }
+ int satb_q_index_byte_offset =
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_index());
+ int satb_q_buf_byte_offset =
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_buf());
+ assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+ in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+ "check sizes in assembly below");
+
+ masm.bind(restart);
+ masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+ masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+ // If the branch is taken, no harm in executing this in the delay slot.
+ masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+ masm.sub(L0, oopSize, L0);
+
+ masm.st_ptr(pre_val, L1, L0); // [_buf + index] := I0
+ if (!with_frame) {
+ // Use return-from-leaf
+ masm.retl();
+ masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+ } else {
+ // Not delayed.
+ masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+ }
+ if (with_frame) {
+ masm.ret();
+ masm.delayed()->restore();
+ }
+ masm.bind(refill);
+
+ address handle_zero =
+ CAST_FROM_FN_PTR(address,
+ &SATBMarkQueueSet::handle_zero_index_for_thread);
+ // This should be rare enough that we can afford to save all the
+ // scratch registers that the calling context might be using.
+ masm.mov(G1_scratch, L0);
+ masm.mov(G3_scratch, L1);
+ masm.mov(G4, L2);
+ // We need the value of O0 above (for the write into the buffer), so we
+ // save and restore it.
+ masm.mov(O0, L3);
+ // Since the call will overwrite O7, we save and restore that, as well.
+ masm.mov(O7, L4);
+ masm.call_VM_leaf(L5, handle_zero, G2_thread);
+ masm.mov(L0, G1_scratch);
+ masm.mov(L1, G3_scratch);
+ masm.mov(L2, G4);
+ masm.mov(L3, O0);
+ masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+ masm.delayed()->mov(L4, O7);
+
+ if (with_frame) {
+ satb_log_enqueue_with_frame = start;
+ satb_log_enqueue_with_frame_end = masm.pc();
+ } else {
+ satb_log_enqueue_frameless = start;
+ satb_log_enqueue_frameless_end = masm.pc();
+ }
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+ if (with_frame) {
+ if (satb_log_enqueue_with_frame == 0) {
+ generate_satb_log_enqueue(with_frame);
+ assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+ if (G1SATBPrintStubs) {
+ tty->print_cr("Generated with-frame satb enqueue:");
+ Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+ satb_log_enqueue_with_frame_end,
+ tty);
+ }
+ }
+ } else {
+ if (satb_log_enqueue_frameless == 0) {
+ generate_satb_log_enqueue(with_frame);
+ assert(satb_log_enqueue_frameless != 0, "postcondition.");
+ if (G1SATBPrintStubs) {
+ tty->print_cr("Generated frameless satb enqueue:");
+ Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+ satb_log_enqueue_frameless_end,
+ tty);
+ }
+ }
+ }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+ assert(offset == 0 || index == noreg, "choose one");
+
+ if (G1DisablePreBarrier) return;
+ // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+ Label filtered;
+ // satb_log_barrier_work0(tmp, filtered);
+ if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+ ld(G2,
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_active()),
+ tmp);
+ } else {
+ guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+ "Assumption");
+ ldsb(G2,
+ in_bytes(JavaThread::satb_mark_queue_offset() +
+ PtrQueue::byte_offset_of_active()),
+ tmp);
+ }
+ // Check on whether to annul.
+ br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+ delayed() -> nop();
+
+ // satb_log_barrier_work1(tmp, offset);
+ if (index == noreg) {
+ if (Assembler::is_simm13(offset)) {
+ ld_ptr(obj, offset, tmp);
+ } else {
+ set(offset, tmp);
+ ld_ptr(obj, tmp, tmp);
+ }
+ } else {
+ ld_ptr(obj, index, tmp);
+ }
+
+ // satb_log_barrier_work2(obj, tmp, offset);
+
+ // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+
+ const Register pre_val = tmp;
+
+ if (G1SATBBarrierPrintNullPreVals) {
+ save_frame(0);
+ mov(pre_val, O0);
+ // Save G-regs that target may use.
+ mov(G1, L1);
+ mov(G2, L2);
+ mov(G3, L3);
+ mov(G4, L4);
+ mov(G5, L5);
+ call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+ delayed()->nop();
+ // Restore G-regs that target may have used.
+ mov(L1, G1);
+ mov(L2, G2);
+ mov(L3, G3);
+ mov(L4, G4);
+ mov(L5, G5);
+ restore(G0, G0, G0);
+ }
+
+ // Check on whether to annul.
+ br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+ delayed() -> nop();
+
+ // OK, it's not filtered, so we'll need to call enqueue. In the normal
+ // case, pre_val will be a scratch G-reg, but there's some cases in which
+ // it's an O-reg. In the first case, do a normal call. In the latter,
+ // do a save here and call the frameless version.
+
+ guarantee(pre_val->is_global() || pre_val->is_out(),
+ "Or we need to think harder.");
+ if (pre_val->is_global() && !preserve_o_regs) {
+ generate_satb_log_enqueue_if_necessary(true); // with frame.
+ call(satb_log_enqueue_with_frame);
+ delayed()->mov(pre_val, O0);
+ } else {
+ generate_satb_log_enqueue_if_necessary(false); // with frameless.
+ save_frame(0);
+ call(satb_log_enqueue_frameless);
+ delayed()->mov(pre_val->after_save(), O0);
+ restore();
+ }
+
+ bind(filtered);
+}
+
+static jint num_ct_writes = 0;
+static jint num_ct_writes_filtered_in_hr = 0;
+static jint num_ct_writes_filtered_null = 0;
+static jint num_ct_writes_filtered_pop = 0;
+static G1CollectedHeap* g1 = NULL;
+
+static Thread* count_ct_writes(void* filter_val, void* new_val) {
+ Atomic::inc(&num_ct_writes);
+ if (filter_val == NULL) {
+ Atomic::inc(&num_ct_writes_filtered_in_hr);
+ } else if (new_val == NULL) {
+ Atomic::inc(&num_ct_writes_filtered_null);
+ } else {
+ if (g1 == NULL) {
+ g1 = G1CollectedHeap::heap();
+ }
+ if ((HeapWord*)new_val < g1->popular_object_boundary()) {
+ Atomic::inc(&num_ct_writes_filtered_pop);
+ }
+ }
+ if ((num_ct_writes % 1000000) == 0) {
+ jint num_ct_writes_filtered =
+ num_ct_writes_filtered_in_hr +
+ num_ct_writes_filtered_null +
+ num_ct_writes_filtered_pop;
+
+ tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+ " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
+ num_ct_writes,
+ 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+ 100.0*(float)num_ct_writes_filtered_in_hr/
+ (float)num_ct_writes,
+ 100.0*(float)num_ct_writes_filtered_null/
+ (float)num_ct_writes,
+ 100.0*(float)num_ct_writes_filtered_pop/
+ (float)num_ct_writes);
+ }
+ return Thread::current();
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+ BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+ CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+ MacroAssembler masm(&buf);
+ address start = masm.pc();
+
+ Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+ masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+ masm.srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+ Address rs(O1, (address)byte_map_base);
+ masm.load_address(rs); // O1 := <card table base>
+ masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+ masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+ O2, not_already_dirty);
+ // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+ // case, harmless if not.
+ masm.delayed()->add(O0, O1, O3);
+
+ // We didn't take the branch, so we're already dirty: return.
+ // Use return-from-leaf
+ masm.retl();
+ masm.delayed()->nop();
+
+ // Not dirty.
+ masm.bind(not_already_dirty);
+ // First, dirty it.
+ masm.stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty).
+ int dirty_card_q_index_byte_offset =
+ in_bytes(JavaThread::dirty_card_queue_offset() +
+ PtrQueue::byte_offset_of_index());
+ int dirty_card_q_buf_byte_offset =
+ in_bytes(JavaThread::dirty_card_queue_offset() +
+ PtrQueue::byte_offset_of_buf());
+ masm.bind(restart);
+ masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+ masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+ L0, refill);
+ // If the branch is taken, no harm in executing this in the delay slot.
+ masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+ masm.sub(L0, oopSize, L0);
+
+ masm.st_ptr(O3, L1, L0); // [_buf + index] := I0
+ // Use return-from-leaf
+ masm.retl();
+ masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+ masm.bind(refill);
+ address handle_zero =
+ CAST_FROM_FN_PTR(address,
+ &DirtyCardQueueSet::handle_zero_index_for_thread);
+ // This should be rare enough that we can afford to save all the
+ // scratch registers that the calling context might be using.
+ masm.mov(G1_scratch, L3);
+ masm.mov(G3_scratch, L5);
+ // We need the value of O3 above (for the write into the buffer), so we
+ // save and restore it.
+ masm.mov(O3, L6);
+ // Since the call will overwrite O7, we save and restore that, as well.
+ masm.mov(O7, L4);
+
+ masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+ masm.mov(L3, G1_scratch);
+ masm.mov(L5, G3_scratch);
+ masm.mov(L6, O3);
+ masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+ masm.delayed()->mov(L4, O7);
+
+ dirty_card_log_enqueue = start;
+ dirty_card_log_enqueue_end = masm.pc();
+ // XXX Should have a guarantee here about not going off the end!
+ // Does it already do so? Do an experiment...
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+ if (dirty_card_log_enqueue == 0) {
+ generate_dirty_card_log_enqueue(byte_map_base);
+ assert(dirty_card_log_enqueue != 0, "postcondition.");
+ if (G1SATBPrintStubs) {
+ tty->print_cr("Generated dirty_card enqueue:");
+ Disassembler::decode((u_char*)dirty_card_log_enqueue,
+ dirty_card_log_enqueue_end,
+ tty);
+ }
+ }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+ Label filtered;
+ MacroAssembler* post_filter_masm = this;
+
+ if (new_val == G0) return;
+ if (G1DisablePostBarrier) return;
+
+ G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::G1SATBCT ||
+ bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+ if (G1RSBarrierRegionFilter) {
+ xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+ srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+ srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+ if (G1PrintCTFilterStats) {
+ guarantee(tmp->is_global(), "Or stats won't work...");
+ // This is a sleazy hack: I'm temporarily hijacking G2, which I
+ // promise to restore.
+ mov(new_val, G2);
+ save_frame(0);
+ mov(tmp, O0);
+ mov(G2, O1);
+ // Save G-regs that target may use.
+ mov(G1, L1);
+ mov(G2, L2);
+ mov(G3, L3);
+ mov(G4, L4);
+ mov(G5, L5);
+ call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+ delayed()->nop();
+ mov(O0, G2);
+ // Restore G-regs that target may have used.
+ mov(L1, G1);
+ mov(L3, G3);
+ mov(L4, G4);
+ mov(L5, G5);
+ restore(G0, G0, G0);
+ }
+ // XXX Should I predict this taken or not? Does it mattern?
+ br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+ delayed()->nop();
+ }
+
+ // Now we decide how to generate the card table write. If we're
+ // enqueueing, we call out to a generated function. Otherwise, we do it
+ // inline here.
+
+ if (G1RSBarrierUseQueue) {
+ // If the "store_addr" register is an "in" or "local" register, move it to
+ // a scratch reg so we can pass it as an argument.
+ bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+ // Pick a scratch register different from "tmp".
+ Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+ // Make sure we use up the delay slot!
+ if (use_scr) {
+ post_filter_masm->mov(store_addr, scr);
+ } else {
+ post_filter_masm->nop();
+ }
+ generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+ save_frame(0);
+ call(dirty_card_log_enqueue);
+ if (use_scr) {
+ delayed()->mov(scr, O0);
+ } else {
+ delayed()->mov(store_addr->after_save(), O0);
+ }
+ restore();
+
+ } else {
+
+#ifdef _LP64
+ post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+#else
+ post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+#endif
+ assert( tmp != store_addr, "need separate temp reg");
+ Address rs(tmp, (address)bs->byte_map_base);
+ load_address(rs);
+ stb(G0, rs.base(), store_addr);
+ }
+
+ bind(filtered);
+
+}
+
+#endif // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+ // If we're writing constant NULL, we can skip the write barrier.
+ if (new_val == G0) return;
+ CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+ assert(bs->kind() == BarrierSet::CardTableModRef ||
+ bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+ card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
void MacroAssembler::load_klass(Register s, Register d) {
// The number of bytes in this code is used by
// MachCallDynamicJavaNode::ret_addr_offset()