src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp
changeset 49748 6a880e576856
parent 49484 ee8fa73b90f9
child 49752 93d84f667d12
--- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp	Wed Apr 11 10:05:02 2018 -0400
+++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp	Wed Apr 11 16:07:42 2018 +0200
@@ -111,3 +111,299 @@
 #endif
   __ popa();
 }
+
+void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
+  bool on_reference = on_weak || on_phantom;
+  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
+  if (on_oop && on_reference) {
+    const Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);
+    NOT_LP64(__ get_thread(thread));
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    g1_write_barrier_pre(masm /* masm */,
+                         noreg /* obj */,
+                         dst /* pre_val */,
+                         thread /* thread */,
+                         tmp1 /* tmp */,
+                         true /* tosca_live */,
+                         true /* expand_call */);
+  }
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
+                                                 Register obj,
+                                                 Register pre_val,
+                                                 Register thread,
+                                                 Register tmp,
+                                                 bool tosca_live,
+                                                 bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != rax, "check this code");
+  }
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       SATBMarkQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       SATBMarkQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       SATBMarkQueue::byte_offset_of_buf()));
+
+
+  // Is marking active?
+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
+    __ cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
+    __ cmpb(in_progress, 0);
+  }
+  __ jcc(Assembler::equal, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
+  }
+
+  // Is the previous value null?
+  __ cmpptr(pre_val, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  __ movptr(tmp, index);                   // tmp := *index_adr
+  __ cmpptr(tmp, 0);                       // tmp == 0?
+  __ jcc(Assembler::equal, runtime);       // If yes, goto runtime
+
+  __ subptr(tmp, wordSize);                // tmp := tmp - wordSize
+  __ movptr(index, tmp);                   // *index_adr := tmp
+  __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  __ movptr(Address(tmp, 0), pre_val);
+  __ jmp(done);
+
+  __ bind(runtime);
+  // save the live input values
+  if(tosca_live) __ push(rax);
+
+  if (obj != noreg && obj != rax)
+    __ push(obj);
+
+  if (pre_val != rax)
+    __ push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  NOT_LP64( __ push(thread); )
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+#ifdef _LP64
+    if (c_rarg1 != thread) {
+      __ mov(c_rarg1, thread);
+    }
+    if (c_rarg0 != pre_val) {
+      __ mov(c_rarg0, pre_val);
+    }
+#else
+    __ push(thread);
+    __ push(pre_val);
+#endif
+    __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  NOT_LP64( __ pop(thread); )
+
+  // save the live input values
+  if (pre_val != rax)
+    __ pop(pre_val);
+
+  if (obj != noreg && obj != rax)
+    __ pop(obj);
+
+  if(tosca_live) __ pop(rax);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
+                                                  Register store_addr,
+                                                  Register new_val,
+                                                  Register thread,
+                                                  Register tmp,
+                                                  Register tmp2) {
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       DirtyCardQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       DirtyCardQueue::byte_offset_of_buf()));
+
+  CardTableBarrierSet* ct =
+    barrier_set_cast<CardTableBarrierSet>(Universe::heap()->barrier_set());
+  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  __ movptr(tmp, store_addr);
+  __ xorptr(tmp, new_val);
+  __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  __ jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  __ cmpptr(new_val, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  const Register card_addr = tmp;
+  const Register cardtable = tmp2;
+
+  __ movptr(card_addr, store_addr);
+  __ shrptr(card_addr, CardTable::card_shift);
+  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+  // a valid address and therefore is not properly handled by the relocation code.
+  __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base());
+  __ addptr(card_addr, cardtable);
+
+  __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
+  __ jcc(Assembler::equal, done);
+
+  __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+  __ jcc(Assembler::equal, done);
+
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
+
+  __ cmpl(queue_index, 0);
+  __ jcc(Assembler::equal, runtime);
+  __ subl(queue_index, wordSize);
+  __ movptr(tmp2, buffer);
+#ifdef _LP64
+  __ movslq(rscratch1, queue_index);
+  __ addq(tmp2, rscratch1);
+  __ movq(Address(tmp2, 0), card_addr);
+#else
+  __ addl(tmp2, queue_index);
+  __ movl(Address(tmp2, 0), card_addr);
+#endif
+  __ jmp(done);
+
+  __ bind(runtime);
+  // save the live input values
+  __ push(store_addr);
+  __ push(new_val);
+#ifdef _LP64
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  __ push(thread);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  __ pop(thread);
+#endif
+  __ pop(new_val);
+  __ pop(store_addr);
+
+  __ bind(done);
+}
+
+void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
+                                         Address dst, Register val, Register tmp1, Register tmp2) {
+  bool in_heap = (decorators & IN_HEAP) != 0;
+  bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0;
+
+  bool needs_pre_barrier = in_heap || in_concurrent_root;
+  bool needs_post_barrier = val != noreg && in_heap;
+
+  Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);
+  Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
+  // flatten object address if needed
+  // We do it regardless of precise because we need the registers
+  if (dst.index() == noreg && dst.disp() == 0) {
+    if (dst.base() != tmp1) {
+      __ movptr(tmp1, dst.base());
+    }
+  } else {
+    __ lea(tmp1, dst);
+  }
+
+#ifndef _LP64
+  InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);
+#endif
+
+  NOT_LP64(__ get_thread(rcx));
+  NOT_LP64(imasm->save_bcp());
+
+  if (needs_pre_barrier) {
+    g1_write_barrier_pre(masm /*masm*/,
+                         tmp1 /* obj */,
+                         tmp2 /* pre_val */,
+                         rthread /* thread */,
+                         tmp3  /* tmp */,
+                         val != noreg /* tosca_live */,
+                         false /* expand_call */);
+  }
+  if (val == noreg) {
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+  } else {
+    Register new_val = val;
+    if (needs_post_barrier) {
+      // G1 barrier needs uncompressed oop for region cross check.
+      if (UseCompressedOops) {
+        new_val = tmp2;
+        __ movptr(new_val, val);
+      }
+    }
+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
+    if (needs_post_barrier) {
+      g1_write_barrier_post(masm /*masm*/,
+                            tmp1 /* store_adr */,
+                            new_val /* new_val */,
+                            rthread /* thread */,
+                            tmp3 /* tmp */,
+                            tmp2 /* tmp2 */);
+    }
+  }
+  NOT_LP64(imasm->restore_bcp());
+}