--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp Wed Apr 11 10:05:02 2018 -0400
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp Wed Apr 11 16:07:42 2018 +0200
@@ -111,3 +111,299 @@
#endif
__ popa();
}
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Register dst, Address src, Register tmp1, Register tmp_thread) {
+ bool on_oop = type == T_OBJECT || type == T_ARRAY;
+ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+ bool on_reference = on_weak || on_phantom;
+ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+ if (on_oop && on_reference) {
+ const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
+ NOT_LP64(__ get_thread(thread));
+
+ // Generate the G1 pre-barrier code to log the value of
+ // the referent field in an SATB buffer.
+ g1_write_barrier_pre(masm /* masm */,
+ noreg /* obj */,
+ dst /* pre_val */,
+ thread /* thread */,
+ tmp1 /* tmp */,
+ true /* tosca_live */,
+ true /* expand_call */);
+ }
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+ Register obj,
+ Register pre_val,
+ Register thread,
+ Register tmp,
+ bool tosca_live,
+ bool expand_call) {
+ // If expand_call is true then we expand the call_VM_leaf macro
+ // directly to skip generating the check by
+ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+ Label done;
+ Label runtime;
+
+ assert(pre_val != noreg, "check this code");
+
+ if (obj != noreg) {
+ assert_different_registers(obj, pre_val, tmp);
+ assert(pre_val != rax, "check this code");
+ }
+
+ Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_active()));
+ Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_index()));
+ Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+ SATBMarkQueue::byte_offset_of_buf()));
+
+
+ // Is marking active?
+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+ __ cmpl(in_progress, 0);
+ } else {
+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+ __ cmpb(in_progress, 0);
+ }
+ __ jcc(Assembler::equal, done);
+
+ // Do we need to load the previous value?
+ if (obj != noreg) {
+ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+ }
+
+ // Is the previous value null?
+ __ cmpptr(pre_val, (int32_t) NULL_WORD);
+ __ jcc(Assembler::equal, done);
+
+ // Can we store original value in the thread's buffer?
+ // Is index == 0?
+ // (The index field is typed as size_t.)
+
+ __ movptr(tmp, index); // tmp := *index_adr
+ __ cmpptr(tmp, 0); // tmp == 0?
+ __ jcc(Assembler::equal, runtime); // If yes, goto runtime
+
+ __ subptr(tmp, wordSize); // tmp := tmp - wordSize
+ __ movptr(index, tmp); // *index_adr := tmp
+ __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr
+
+ // Record the previous value
+ __ movptr(Address(tmp, 0), pre_val);
+ __ jmp(done);
+
+ __ bind(runtime);
+ // save the live input values
+ if(tosca_live) __ push(rax);
+
+ if (obj != noreg && obj != rax)
+ __ push(obj);
+
+ if (pre_val != rax)
+ __ push(pre_val);
+
+ // Calling the runtime using the regular call_VM_leaf mechanism generates
+ // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+ //
+ // If we care generating the pre-barrier without a frame (e.g. in the
+ // intrinsified Reference.get() routine) then ebp might be pointing to
+ // the caller frame and so this check will most likely fail at runtime.
+ //
+ // Expanding the call directly bypasses the generation of the check.
+ // So when we do not have have a full interpreter frame on the stack
+ // expand_call should be passed true.
+
+ NOT_LP64( __ push(thread); )
+
+ if (expand_call) {
+ LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+#ifdef _LP64
+ if (c_rarg1 != thread) {
+ __ mov(c_rarg1, thread);
+ }
+ if (c_rarg0 != pre_val) {
+ __ mov(c_rarg0, pre_val);
+ }
+#else
+ __ push(thread);
+ __ push(pre_val);
+#endif
+ __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+ }
+
+ NOT_LP64( __ pop(thread); )
+
+ // save the live input values
+ if (pre_val != rax)
+ __ pop(pre_val);
+
+ if (obj != noreg && obj != rax)
+ __ pop(obj);
+
+ if(tosca_live) __ pop(rax);
+
+ __ bind(done);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+ Register store_addr,
+ Register new_val,
+ Register thread,
+ Register tmp,
+ Register tmp2) {
+#ifdef _LP64
+ assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+ Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_index()));
+ Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+ DirtyCardQueue::byte_offset_of_buf()));
+
+ CardTableBarrierSet* ct =
+ barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+ Label done;
+ Label runtime;
+
+ // Does store cross heap regions?
+
+ __ movptr(tmp, store_addr);
+ __ xorptr(tmp, new_val);
+ __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+ __ jcc(Assembler::equal, done);
+
+ // crosses regions, storing NULL?
+
+ __ cmpptr(new_val, (int32_t) NULL_WORD);
+ __ jcc(Assembler::equal, done);
+
+ // storing region crossing non-NULL, is card already dirty?
+
+ const Register card_addr = tmp;
+ const Register cardtable = tmp2;
+
+ __ movptr(card_addr, store_addr);
+ __ shrptr(card_addr, CardTable::card_shift);
+ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+ // a valid address and therefore is not properly handled by the relocation code.
+ __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
+ __ addptr(card_addr, cardtable);
+
+ __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
+ __ jcc(Assembler::equal, done);
+
+ __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+ __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+ __ jcc(Assembler::equal, done);
+
+
+ // storing a region crossing, non-NULL oop, card is clean.
+ // dirty card and log.
+
+ __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+
+ __ cmpl(queue_index, 0);
+ __ jcc(Assembler::equal, runtime);
+ __ subl(queue_index, wordSize);
+ __ movptr(tmp2, buffer);
+#ifdef _LP64
+ __ movslq(rscratch1, queue_index);
+ __ addq(tmp2, rscratch1);
+ __ movq(Address(tmp2, 0), card_addr);
+#else
+ __ addl(tmp2, queue_index);
+ __ movl(Address(tmp2, 0), card_addr);
+#endif
+ __ jmp(done);
+
+ __ bind(runtime);
+ // save the live input values
+ __ push(store_addr);
+ __ push(new_val);
+#ifdef _LP64
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+ __ push(thread);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+ __ pop(thread);
+#endif
+ __ pop(new_val);
+ __ pop(store_addr);
+
+ __ bind(done);
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+ Address dst, Register val, Register tmp1, Register tmp2) {
+ bool in_heap = (decorators & IN_HEAP) != 0;
+ bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
+
+ bool needs_pre_barrier = in_heap || in_concurrent_root;
+ bool needs_post_barrier = val != noreg && in_heap;
+
+ Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
+ Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
+ // flatten object address if needed
+ // We do it regardless of precise because we need the registers
+ if (dst.index() == noreg && dst.disp() == 0) {
+ if (dst.base() != tmp1) {
+ __ movptr(tmp1, dst.base());
+ }
+ } else {
+ __ lea(tmp1, dst);
+ }
+
+#ifndef _LP64
+ InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
+#endif
+
+ NOT_LP64(__ get_thread(rcx));
+ NOT_LP64(imasm->save_bcp());
+
+ if (needs_pre_barrier) {
+ g1_write_barrier_pre(masm /*masm*/,
+ tmp1 /* obj */,
+ tmp2 /* pre_val */,
+ rthread /* thread */,
+ tmp3 /* tmp */,
+ val != noreg /* tosca_live */,
+ false /* expand_call */);
+ }
+ if (val == noreg) {
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ } else {
+ Register new_val = val;
+ if (needs_post_barrier) {
+ // G1 barrier needs uncompressed oop for region cross check.
+ if (UseCompressedOops) {
+ new_val = tmp2;
+ __ movptr(new_val, val);
+ }
+ }
+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+ if (needs_post_barrier) {
+ g1_write_barrier_post(masm /*masm*/,
+ tmp1 /* store_adr */,
+ new_val /* new_val */,
+ rthread /* thread */,
+ tmp3 /* tmp */,
+ tmp2 /* tmp2 */);
+ }
+ }
+ NOT_LP64(imasm->restore_bcp());
+}