8232992: Shenandoah: Implement self-fixing interpreter LRB
authorzgu
Mon, 28 Oct 2019 11:33:28 -0400
changeset 58819 ef8be51fff48
parent 58817 7f27d70a2424
child 58821 5ec8aeda451e
8232992: Shenandoah: Implement self-fixing interpreter LRB Reviewed-by: shade
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Mon Oct 28 11:33:28 2019 -0400
@@ -233,9 +233,10 @@
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
   assert(dst != rscratch2, "need rscratch2");
+  assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
 
   Label done;
   __ enter();
@@ -245,17 +246,38 @@
   // Check for heap stability
   __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
 
-  RegSet to_save = RegSet::of(r0);
+  // use r1 for load address
+  Register result_dst = dst;
+  if (dst == r1) {
+    __ mov(rscratch1, dst);
+    dst = rscratch1;
+  }
+
+  RegSet to_save_r1 = RegSet::of(r1);
+  // If outgoing register is r1, we can clobber it
+  if (result_dst != r1) {
+    __ push(to_save_r1, sp);
+  }
+  __ lea(r1, load_addr);
+
+  RegSet to_save_r0 = RegSet::of(r0);
   if (dst != r0) {
-    __ push(to_save, sp);
+    __ push(to_save_r0, sp);
     __ mov(r0, dst);
   }
 
   __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 
+  if (result_dst != r0) {
+    __ mov(result_dst, r0);
+  }
+
   if (dst != r0) {
-    __ mov(dst, r0);
-    __ pop(to_save, sp);
+    __ pop(to_save_r0, sp);
+  }
+
+  if (result_dst != r1) {
+    __ pop(to_save_r1, sp);
   }
 
   __ bind(done);
@@ -315,11 +337,11 @@
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Register tmp) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
   if (ShenandoahLoadRefBarrier) {
     Label is_null;
     __ cbz(dst, is_null);
-    load_reference_barrier_not_null(masm, dst, tmp);
+    load_reference_barrier_not_null(masm, dst, load_addr);
     __ bind(is_null);
   }
 }
@@ -349,7 +371,7 @@
     if (not_in_heap && !is_traversal_mode) {
       load_reference_barrier_native(masm, dst, src);
     } else {
-      load_reference_barrier(masm, dst, tmp1);
+      load_reference_barrier(masm, dst, src);
     }
 
     if (dst != result_dst) {
@@ -619,9 +641,9 @@
   __ load_parameter(0, r0);
   __ load_parameter(1, r1);
   if (UseCompressedOops) {
-    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow));
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
   } else {
-    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup));
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
   }
   __ blr(lr);
   __ mov(rscratch1, r0);
@@ -646,6 +668,7 @@
 //
 // Input:
 //   r0: OOP to evacuate.  Not null.
+//   r1: load address
 //
 // Output:
 //   r0: Pointer to evacuated OOP.
@@ -681,7 +704,11 @@
 
   __ push_call_clobbered_registers();
 
-  __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  if (UseCompressedOops) {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+  } else {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  }
   __ blr(lr);
   __ mov(rscratch1, r0);
   __ pop_call_clobbered_registers();
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp	Mon Oct 28 11:33:28 2019 -0400
@@ -56,8 +56,8 @@
 
   void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
   void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-  void load_reference_barrier(MacroAssembler* masm, Register dst, Register tmp);
-  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp);
+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
   void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr);
 
   address generate_shenandoah_lrb(StubCodeGenerator* cgen);
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Mon Oct 28 11:33:28 2019 -0400
@@ -247,7 +247,7 @@
   __ bind(done);
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 
   Label done;
@@ -262,26 +262,51 @@
   __ push(thread);
   __ get_thread(thread);
 #endif
-  assert_different_registers(dst, thread);
 
   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
   __ jccb(Assembler::zero, done);
 
-   if (dst != rax) {
-     __ xchgptr(dst, rax); // Move obj into rax and save rax into obj.
-   }
+  // Use rsi for src address
+  const Register src_addr = rsi;
+  // Setup address parameter first, if it does not clobber oop in dst
+  bool need_addr_setup = (src_addr != dst);
+
+  if (need_addr_setup) {
+    __ push(src_addr);
+    __ lea(src_addr, src);
 
-   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+    if (dst != rax) {
+      // Move obj into rax and save rax
+      __ push(rax);
+      __ movptr(rax, dst);
+    }
+  } else {
+    // dst == rsi
+    __ push(rax);
+    __ movptr(rax, dst);
 
-   if (dst != rax) {
-     __ xchgptr(rax, dst); // Swap back obj with rax.
-   }
+    // we can clobber it, since it is outgoing register
+    __ lea(src_addr, src);
+  }
+
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+
+  if (need_addr_setup) {
+    if (dst != rax) {
+      __ movptr(dst, rax);
+      __ pop(rax);
+    }
+    __ pop(src_addr);
+  } else {
+    __ movptr(dst, rax);
+    __ pop(rax);
+  }
 
   __ bind(done);
 
 #ifndef _LP64
-  __ pop(thread);
+    __ pop(thread);
 #endif
 }
 
@@ -410,12 +435,12 @@
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) {
   if (ShenandoahLoadRefBarrier) {
     Label done;
     __ testptr(dst, dst);
     __ jcc(Assembler::zero, done);
-    load_reference_barrier_not_null(masm, dst);
+    load_reference_barrier_not_null(masm, dst, src);
     __ bind(done);
   }
 }
@@ -454,7 +479,7 @@
     if (not_in_heap && !is_traversal_mode) {
       load_reference_barrier_native(masm, dst, src);
     } else {
-      load_reference_barrier(masm, dst);
+      load_reference_barrier(masm, dst, src);
     }
 
     if (dst != result_dst) {
@@ -864,14 +889,14 @@
   __ load_parameter(0, c_rarg0);
   __ load_parameter(1, c_rarg1);
   if (UseCompressedOops) {
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow), c_rarg0, c_rarg1);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
   } else {
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), c_rarg0, c_rarg1);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
   }
 #else
   __ load_parameter(0, rax);
   __ load_parameter(1, rbx);
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), rax, rbx);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
 #endif
 
   __ restore_live_registers_except_rax(true);
@@ -890,6 +915,11 @@
 
 #define __ cgen->assembler()->
 
+/*
+ *  Incoming parameters:
+ *  rax: oop
+ *  rsi: load address
+ */
 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
   __ align(CodeEntryAlignment);
   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
@@ -941,7 +971,6 @@
   __ push(rcx);
   __ push(rdx);
   __ push(rdi);
-  __ push(rsi);
 #ifdef _LP64
   __ push(r8);
   __ push(r9);
@@ -956,7 +985,11 @@
   __ movptr(rbp, rsp);
   __ andptr(rsp, -StackAlignmentInBytes);
   __ push_FPU_state();
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);
+  if (UseCompressedOops) {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi);
+  }
   __ pop_FPU_state();
   __ movptr(rsp, rbp);
   __ pop(rbp);
@@ -970,7 +1003,6 @@
   __ pop(r9);
   __ pop(r8);
 #endif
-  __ pop(rsi);
   __ pop(rdi);
   __ pop(rdx);
   __ pop(rcx);
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Mon Oct 28 11:33:28 2019 -0400
@@ -55,7 +55,7 @@
                                     bool tosca_live,
                                     bool expand_call);
 
-  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst);
+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src);
 
   void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);
 
@@ -72,7 +72,7 @@
   void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
 #endif
 
-  void load_reference_barrier(MacroAssembler* masm, Register dst);
+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address src);
   void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src);
 
   void cmpxchg_oop(MacroAssembler* masm,
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Mon Oct 28 11:33:28 2019 -0400
@@ -303,8 +303,7 @@
 
   address entry_point = call->as_CallLeaf()->entry_point();
   return (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)) ||
-         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup)) ||
-         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow)) ||
+         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)) ||
          (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
 }
 
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Mon Oct 28 11:33:28 2019 -0400
@@ -1028,8 +1028,8 @@
   phase->register_new_node(mm, ctrl);
 
   address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
-          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow) :
-          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup);
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
 
   address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
                                : target;
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Mon Oct 28 11:33:28 2019 -0400
@@ -62,15 +62,11 @@
   ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(orig);
 JRT_END
 
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc* src))
-  return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, (oop*)NULL);
-JRT_END
-
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup(oopDesc* src, oop* load_addr))
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc* src, oop* load_addr))
   return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
 JRT_END
 
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr))
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_narrow(oopDesc* src, narrowOop* load_addr))
   return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
 JRT_END
 
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Mon Oct 28 12:55:48 2019 +0000
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Mon Oct 28 11:33:28 2019 -0400
@@ -38,9 +38,8 @@
   static void write_ref_array_pre_duinit_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length);
   static void write_ref_field_pre_entry(oopDesc* orig, JavaThread* thread);
 
-  static oopDesc* load_reference_barrier(oopDesc* src);
-  static oopDesc* load_reference_barrier_fixup(oopDesc* src, oop* load_addr);
-  static oopDesc* load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr);
+  static oopDesc* load_reference_barrier(oopDesc* src, oop* load_addr);
+  static oopDesc* load_reference_barrier_narrow(oopDesc* src, narrowOop* load_addr);
 
   static oopDesc* load_reference_barrier_native(oopDesc* src, oop* load_addr);