8231087: Shenandoah: Self-fixing load reference barriers for C1/C2
authorrkennke
Wed, 18 Sep 2019 20:56:20 +0200
changeset 58219 bc0648405d67
parent 58218 0d7877278adf
child 58220 2c4185d7276a
8231087: Shenandoah: Self-fixing load reference barriers for C1/C2 Reviewed-by: shade
src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp
src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -99,7 +99,7 @@
   __ xchg(access.resolved_addr(), value_opr, result, tmp);
 
   if (access.is_oop()) {
-    result = load_reference_barrier(access.gen(), result);
+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
     LIR_Opr tmp = gen->new_register(type);
     __ move(result, tmp);
     result = tmp;
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -501,6 +501,7 @@
 
   Register obj = stub->obj()->as_register();
   Register res = stub->result()->as_register();
+  Register addr = stub->addr()->as_register_lo();
   Register tmp1 = stub->tmp1()->as_register();
   Register tmp2 = stub->tmp2()->as_register();
 
@@ -533,6 +534,7 @@
 
   __ bind(slow_path);
   ce->store_parameter(res, 0);
+  ce->store_parameter(addr, 1);
   __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
 
   __ b(*stub->continuation());
@@ -594,7 +596,12 @@
 
   __ push_call_clobbered_registers();
   __ load_parameter(0, r0);
-  __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  __ load_parameter(1, r1);
+  if (UseCompressedOops) {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow));
+  } else {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup));
+  }
   __ blr(lr);
   __ mov(rscratch1, r0);
   __ pop_call_clobbered_registers();
--- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -110,7 +110,7 @@
   __ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr);
 
   if (access.is_oop()) {
-    result = load_reference_barrier(access.gen(), result);
+    result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0));
     LIR_Opr tmp = gen->new_register(type);
     __ move(result, tmp);
     result = tmp;
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -715,8 +715,10 @@
 
   Register obj = stub->obj()->as_register();
   Register res = stub->result()->as_register();
+  Register addr = stub->addr()->as_register();
   Register tmp1 = stub->tmp1()->as_register();
   Register tmp2 = stub->tmp2()->as_register();
+  assert_different_registers(obj, res, addr, tmp1, tmp2);
 
   Label slow_path;
 
@@ -745,29 +747,9 @@
 #endif
   __ jcc(Assembler::zero, *stub->continuation());
 
-  // Test if object is resolved.
-  __ movptr(tmp1, Address(res, oopDesc::mark_offset_in_bytes()));
-  // Test if both lowest bits are set. We trick it by negating the bits
-  // then test for both bits clear.
-  __ notptr(tmp1);
-#ifdef _LP64
-  __ testb(tmp1, markWord::marked_value);
-#else
-  // On x86_32, C1 register allocator can give us the register without 8-bit support.
-  // Do the full-register access and test to avoid compilation failures.
-  __ testptr(tmp1, markWord::marked_value);
-#endif
-  __ jccb(Assembler::notZero, slow_path);
-  // Clear both lower bits. It's still inverted, so set them, and then invert back.
-  __ orptr(tmp1, markWord::marked_value);
-  __ notptr(tmp1);
-  // At this point, tmp1 contains the decoded forwarding pointer.
-  __ mov(res, tmp1);
-
-  __ jmp(*stub->continuation());
-
   __ bind(slow_path);
   ce->store_parameter(res, 0);
+  ce->store_parameter(addr, 1);
   __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
 
   __ jmp(*stub->continuation());
@@ -838,8 +820,21 @@
   // arg0 : object to be resolved
 
   __ save_live_registers_no_oop_map(true);
-  __ load_parameter(0, LP64_ONLY(c_rarg0) NOT_LP64(rax));
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), LP64_ONLY(c_rarg0) NOT_LP64(rax));
+
+#ifdef _LP64
+  __ load_parameter(0, c_rarg0);
+  __ load_parameter(1, c_rarg1);
+  if (UseCompressedOops) {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow), c_rarg0, c_rarg1);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), c_rarg0, c_rarg1);
+  }
+#else
+  __ load_parameter(0, rax);
+  __ load_parameter(1, rbx);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), rax, rbx);
+#endif
+
   __ restore_live_registers_except_rax(true);
 
   __ epilogue();
--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -105,19 +105,21 @@
   __ branch_destination(slow->continuation());
 }
 
-LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier(LIRGenerator* gen, LIR_Opr obj) {
+LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier(LIRGenerator* gen, LIR_Opr obj, LIR_Opr addr) {
   if (ShenandoahLoadRefBarrier) {
-    return load_reference_barrier_impl(gen, obj);
+    return load_reference_barrier_impl(gen, obj, addr);
   } else {
     return obj;
   }
 }
 
-LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, LIR_Opr obj) {
+LIR_Opr ShenandoahBarrierSetC1::load_reference_barrier_impl(LIRGenerator* gen, LIR_Opr obj, LIR_Opr addr) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 
   obj = ensure_in_register(gen, obj);
   assert(obj->is_register(), "must be a register at this point");
+  addr = ensure_in_register(gen, addr);
+  assert(addr->is_register(), "must be a register at this point");
   LIR_Opr result = gen->result_register_for(obj->value_type());
   __ move(obj, result);
   LIR_Opr tmp1 = gen->new_register(T_OBJECT);
@@ -146,7 +148,7 @@
   }
   __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
 
-  CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, result, tmp1, tmp2);
+  CodeStub* slow = new ShenandoahLoadReferenceBarrierStub(obj, addr, result, tmp1, tmp2);
   __ branch(lir_cond_notEqual, T_INT, slow);
   __ branch_destination(slow->continuation());
 
@@ -155,10 +157,18 @@
 
 LIR_Opr ShenandoahBarrierSetC1::ensure_in_register(LIRGenerator* gen, LIR_Opr obj) {
   if (!obj->is_register()) {
-    LIR_Opr obj_reg = gen->new_register(T_OBJECT);
+    LIR_Opr obj_reg;
     if (obj->is_constant()) {
+      obj_reg = gen->new_register(T_OBJECT);
       __ move(obj, obj_reg);
     } else {
+#ifdef AARCH64
+      // AArch64 expects double-size register.
+      obj_reg = gen->new_pointer_register();
+#else
+      // x86 expects single-size register.
+      obj_reg = gen->new_register(T_OBJECT);
+#endif
       __ leal(obj, obj_reg);
     }
     obj = obj_reg;
@@ -184,6 +194,14 @@
   BarrierSetC1::store_at_resolved(access, value);
 }
 
+LIR_Opr ShenandoahBarrierSetC1::resolve_address(LIRAccess& access, bool resolve_in_register) {
+  // We must resolve in register when patching. This is to avoid
+  // having a patch area in the load barrier stub, since the call
+  // into the runtime to patch will not have the proper oop map.
+  const bool patch_before_barrier = access.is_oop() && (access.decorators() & C1_NEEDS_PATCHING) != 0;
+  return BarrierSetC1::resolve_address(access, resolve_in_register || patch_before_barrier);
+}
+
 void ShenandoahBarrierSetC1::load_at_resolved(LIRAccess& access, LIR_Opr result) {
   if (!access.is_oop()) {
     BarrierSetC1::load_at_resolved(access, result);
@@ -210,7 +228,7 @@
   if (ShenandoahLoadRefBarrier) {
     LIR_Opr tmp = gen->new_register(T_OBJECT);
     BarrierSetC1::load_at_resolved(access, tmp);
-    tmp = load_reference_barrier(access.gen(), tmp);
+    tmp = load_reference_barrier(access.gen(), tmp, access.resolved_addr());
     __ move(tmp, result);
   } else {
     BarrierSetC1::load_at_resolved(access, result);
--- a/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp	Wed Sep 18 20:56:20 2019 +0200
@@ -89,21 +89,24 @@
   friend class ShenandoahBarrierSetC1;
 private:
   LIR_Opr _obj;
+  LIR_Opr _addr;
   LIR_Opr _result;
   LIR_Opr _tmp1;
   LIR_Opr _tmp2;
 
 public:
-  ShenandoahLoadReferenceBarrierStub(LIR_Opr obj, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2) :
-    _obj(obj), _result(result), _tmp1(tmp1), _tmp2(tmp2)
+  ShenandoahLoadReferenceBarrierStub(LIR_Opr obj, LIR_Opr addr, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2) :
+    _obj(obj), _addr(addr), _result(result), _tmp1(tmp1), _tmp2(tmp2)
   {
     assert(_obj->is_register(), "should be register");
+    assert(_addr->is_register(), "should be register");
     assert(_result->is_register(), "should be register");
     assert(_tmp1->is_register(), "should be register");
     assert(_tmp2->is_register(), "should be register");
   }
 
   LIR_Opr obj() const { return _obj; }
+  LIR_Opr addr() const { return _addr; }
   LIR_Opr result() const { return _result; }
   LIR_Opr tmp1() const { return _tmp1; }
   LIR_Opr tmp2() const { return _tmp2; }
@@ -112,6 +115,9 @@
   virtual void visit(LIR_OpVisitState* visitor) {
     visitor->do_slow_case();
     visitor->do_input(_obj);
+    visitor->do_temp(_obj);
+    visitor->do_input(_addr);
+    visitor->do_temp(_addr);
     visitor->do_temp(_result);
     visitor->do_temp(_tmp1);
     visitor->do_temp(_tmp2);
@@ -186,10 +192,10 @@
 
   void pre_barrier(LIRGenerator* gen, CodeEmitInfo* info, DecoratorSet decorators, LIR_Opr addr_opr, LIR_Opr pre_val);
 
-  LIR_Opr load_reference_barrier(LIRGenerator* gen, LIR_Opr obj);
+  LIR_Opr load_reference_barrier(LIRGenerator* gen, LIR_Opr obj, LIR_Opr addr);
   LIR_Opr storeval_barrier(LIRGenerator* gen, LIR_Opr obj, CodeEmitInfo* info, DecoratorSet decorators);
 
-  LIR_Opr load_reference_barrier_impl(LIRGenerator* gen, LIR_Opr obj);
+  LIR_Opr load_reference_barrier_impl(LIRGenerator* gen, LIR_Opr obj, LIR_Opr addr);
 
   LIR_Opr ensure_in_register(LIRGenerator* gen, LIR_Opr obj);
 
@@ -209,6 +215,7 @@
 protected:
 
   virtual void store_at_resolved(LIRAccess& access, LIR_Opr value);
+  virtual LIR_Opr resolve_address(LIRAccess& access, bool resolve_in_register);
   virtual void load_at_resolved(LIRAccess& access, LIR_Opr result);
 
   virtual LIR_Opr atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value);
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -475,9 +475,11 @@
 }
 
 const TypeFunc* ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type() {
-  const Type **fields = TypeTuple::fields(1);
+  const Type **fields = TypeTuple::fields(2);
   fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+  fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;   // original load address
+
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
 
   // create result type (range)
   fields = TypeTuple::fields(1);
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -1016,7 +1016,7 @@
   phase->register_control(ctrl, loop, in_cset_fast_test_iff);
 }
 
-void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
+void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
   IdealLoopTree*loop = phase->get_loop(ctrl);
   const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr()->cast_to_nonconst();
 
@@ -1027,16 +1027,22 @@
   mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
   phase->register_new_node(mm, ctrl);
 
+  address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow) :
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup);
+
   address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
-                               : CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
+                               : target;
   const char* name = is_native ? "oop_load_from_native_barrier" : "load_reference_barrier";
   Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM);
+
   call->init_req(TypeFunc::Control, ctrl);
   call->init_req(TypeFunc::I_O, phase->C->top());
   call->init_req(TypeFunc::Memory, mm);
   call->init_req(TypeFunc::FramePtr, phase->C->top());
   call->init_req(TypeFunc::ReturnAdr, phase->C->top());
   call->init_req(TypeFunc::Parms, val);
+  call->init_req(TypeFunc::Parms+1, load_addr);
   phase->register_control(call, loop, ctrl);
   ctrl = new ProjNode(call, TypeFunc::Control);
   phase->register_control(ctrl, loop, call);
@@ -1401,7 +1407,7 @@
     assert(val->bottom_type()->make_oopptr(), "need oop");
     assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
 
-    enum { _heap_stable = 1, _not_cset, _fwded, _evac_path, _null_path, PATH_LIMIT };
+    enum { _heap_stable = 1, _not_cset, _evac_path, _null_path, PATH_LIMIT };
     Node* region = new RegionNode(PATH_LIMIT);
     Node* val_phi = new PhiNode(region, uncasted_val->bottom_type()->is_oopptr());
     Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
@@ -1451,49 +1457,44 @@
       IfNode* iff = unc_ctrl->in(0)->as_If();
       phase->igvn().replace_input_of(iff, 1, phase->igvn().intcon(1));
     }
-    Node* addr = new AddPNode(new_val, uncasted_val, phase->igvn().MakeConX(oopDesc::mark_offset_in_bytes()));
-    phase->register_new_node(addr, ctrl);
-    assert(new_val->bottom_type()->isa_oopptr(), "what else?");
-    Node* markword = new LoadXNode(ctrl, raw_mem, addr, TypeRawPtr::BOTTOM, TypeX_X, MemNode::unordered);
-    phase->register_new_node(markword, ctrl);
-
-    // Test if object is forwarded. This is the case if lowest two bits are set.
-    Node* masked = new AndXNode(markword, phase->igvn().MakeConX(markWord::lock_mask_in_place));
-    phase->register_new_node(masked, ctrl);
-    Node* cmp = new CmpXNode(masked, phase->igvn().MakeConX(markWord::marked_value));
-    phase->register_new_node(cmp, ctrl);
-
-    // Only branch to LRB stub if object is not forwarded; otherwise reply with fwd ptr
-    Node* bol = new BoolNode(cmp, BoolTest::eq); // Equals 3 means it's forwarded
-    phase->register_new_node(bol, ctrl);
-
-    IfNode* iff = new IfNode(ctrl, bol, PROB_LIKELY(0.999), COUNT_UNKNOWN);
-    phase->register_control(iff, loop, ctrl);
-    Node* if_fwd = new IfTrueNode(iff);
-    phase->register_control(if_fwd, loop, iff);
-    Node* if_not_fwd = new IfFalseNode(iff);
-    phase->register_control(if_not_fwd, loop, iff);
-
-    // Decode forward pointer: since we already have the lowest bits, we can just subtract them
-    // from the mark word without the need for large immediate mask.
-    Node* masked2 = new SubXNode(markword, masked);
-    phase->register_new_node(masked2, if_fwd);
-    Node* fwdraw = new CastX2PNode(masked2);
-    fwdraw->init_req(0, if_fwd);
-    phase->register_new_node(fwdraw, if_fwd);
-    Node* fwd = new CheckCastPPNode(NULL, fwdraw, val->bottom_type());
-    phase->register_new_node(fwd, if_fwd);
-
-    // Wire up not-equal-path in slots 3.
-    region->init_req(_fwded, if_fwd);
-    val_phi->init_req(_fwded, fwd);
-    raw_mem_phi->init_req(_fwded, raw_mem);
 
     // Call lrb-stub and wire up that path in slots 4
     Node* result_mem = NULL;
-    ctrl = if_not_fwd;
-    fwd = new_val;
-    call_lrb_stub(ctrl, fwd, result_mem, raw_mem, lrb->is_native(), phase);
+
+    Node* fwd = new_val;
+    Node* addr;
+    if (ShenandoahSelfFixing) {
+      VectorSet visited(Thread::current()->resource_area());
+      addr = get_load_addr(phase, visited, lrb);
+    } else {
+      addr = phase->igvn().zerocon(T_OBJECT);
+    }
+    if (addr->Opcode() == Op_AddP) {
+      Node* orig_base = addr->in(AddPNode::Base);
+      Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
+      phase->register_new_node(base, ctrl);
+      if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
+        // Field access
+        addr = addr->clone();
+        addr->set_req(AddPNode::Base, base);
+        addr->set_req(AddPNode::Address, base);
+        phase->register_new_node(addr, ctrl);
+      } else {
+        Node* addr2 = addr->in(AddPNode::Address);
+        if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
+              addr2->in(AddPNode::Base) == orig_base) {
+          addr2 = addr2->clone();
+          addr2->set_req(AddPNode::Base, base);
+          addr2->set_req(AddPNode::Address, base);
+          phase->register_new_node(addr2, ctrl);
+          addr = addr->clone();
+          addr->set_req(AddPNode::Base, base);
+          addr->set_req(AddPNode::Address, addr2);
+          phase->register_new_node(addr, ctrl);
+        }
+      }
+    }
+    call_lrb_stub(ctrl, fwd, addr, result_mem, raw_mem, lrb->is_native(), phase);
     region->init_req(_evac_path, ctrl);
     val_phi->init_req(_evac_path, fwd);
     raw_mem_phi->init_req(_evac_path, result_mem);
@@ -1696,6 +1697,74 @@
 
 }
 
+Node* ShenandoahBarrierC2Support::get_load_addr(PhaseIdealLoop* phase, VectorSet& visited, Node* in) {
+  if (visited.test_set(in->_idx)) {
+    return NULL;
+  }
+  switch (in->Opcode()) {
+    case Op_Proj:
+      return get_load_addr(phase, visited, in->in(0));
+    case Op_CastPP:
+    case Op_CheckCastPP:
+    case Op_DecodeN:
+    case Op_EncodeP:
+      return get_load_addr(phase, visited, in->in(1));
+    case Op_LoadN:
+    case Op_LoadP:
+      return in->in(MemNode::Address);
+    case Op_CompareAndExchangeN:
+    case Op_CompareAndExchangeP:
+    case Op_GetAndSetN:
+    case Op_GetAndSetP:
+    case Op_ShenandoahCompareAndExchangeP:
+    case Op_ShenandoahCompareAndExchangeN:
+      // Those instructions would just have stored a different
+      // value into the field. No use to attempt to fix it at this point.
+      return phase->igvn().zerocon(T_OBJECT);
+    case Op_CMoveP:
+    case Op_CMoveN: {
+      Node* t = get_load_addr(phase, visited, in->in(CMoveNode::IfTrue));
+      Node* f = get_load_addr(phase, visited, in->in(CMoveNode::IfFalse));
+      // Handle unambiguous cases: single address reported on both branches.
+      if (t != NULL && f == NULL) return t;
+      if (t == NULL && f != NULL) return f;
+      if (t != NULL && t == f)    return t;
+      // Ambiguity.
+      return phase->igvn().zerocon(T_OBJECT);
+    }
+    case Op_Phi: {
+      Node* addr = NULL;
+      for (uint i = 1; i < in->req(); i++) {
+        Node* addr1 = get_load_addr(phase, visited, in->in(i));
+        if (addr == NULL) {
+          addr = addr1;
+        }
+        if (addr != addr1) {
+          return phase->igvn().zerocon(T_OBJECT);
+        }
+      }
+      return addr;
+    }
+    case Op_ShenandoahLoadReferenceBarrier:
+      return get_load_addr(phase, visited, in->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
+    case Op_ShenandoahEnqueueBarrier:
+      return get_load_addr(phase, visited, in->in(1));
+    case Op_CallDynamicJava:
+    case Op_CallLeaf:
+    case Op_CallStaticJava:
+    case Op_ConN:
+    case Op_ConP:
+    case Op_Parm:
+      return phase->igvn().zerocon(T_OBJECT);
+    default:
+#ifdef ASSERT
+      fatal("Unknown node in get_load_addr: %s", NodeClassNames[in->Opcode()]);
+#endif
+      return phase->igvn().zerocon(T_OBJECT);
+  }
+
+}
+
 void ShenandoahBarrierC2Support::move_heap_stable_test_out_of_loop(IfNode* iff, PhaseIdealLoop* phase) {
   IdealLoopTree *loop = phase->get_loop(iff);
   Node* loop_head = loop->_head;
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp	Wed Sep 18 20:56:20 2019 +0200
@@ -60,7 +60,7 @@
   static void test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase);
   static void test_heap_stable(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl,
                                PhaseIdealLoop* phase);
-  static void call_lrb_stub(Node*& ctrl, Node*& val, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase);
+  static void call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase);
   static Node* clone_null_check(Node*& c, Node* val, Node* unc_ctrl, PhaseIdealLoop* phase);
   static void fix_null_check(Node* unc, Node* unc_ctrl, Node* new_unc_ctrl, Unique_Node_List& uses,
                              PhaseIdealLoop* phase);
@@ -71,6 +71,7 @@
   static void fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase);
   static IfNode* find_unswitching_candidate(const IdealLoopTree *loop, PhaseIdealLoop* phase);
 
+  static Node* get_load_addr(PhaseIdealLoop* phase, VectorSet& visited, Node* lrb);
 public:
   static bool is_dominator(Node* d_c, Node* n_c, Node* d, Node* n, PhaseIdealLoop* phase);
   static bool is_dominator_same_ctrl(Node* c, Node* d, Node* n, PhaseIdealLoop* phase);
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -116,14 +116,24 @@
   }
 }
 
+oop ShenandoahBarrierSet::load_reference_barrier_mutator(oop obj, oop* load_addr) {
+  return load_reference_barrier_mutator_work(obj, load_addr);
+}
 
-oop ShenandoahBarrierSet::load_reference_barrier_mutator(oop obj) {
+oop ShenandoahBarrierSet::load_reference_barrier_mutator(oop obj, narrowOop* load_addr) {
+  return load_reference_barrier_mutator_work(obj, load_addr);
+}
+
+template <class T>
+oop ShenandoahBarrierSet::load_reference_barrier_mutator_work(oop obj, T* load_addr) {
   assert(ShenandoahLoadRefBarrier, "should be enabled");
-  assert(_heap->is_gc_in_progress_mask(ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL), "evac should be in progress");
-  shenandoah_assert_in_cset(NULL, obj);
+  shenandoah_assert_in_cset(load_addr, obj);
 
   oop fwd = resolve_forwarded_not_null(obj);
   if (obj == fwd) {
+    assert(_heap->is_gc_in_progress_mask(ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL),
+           "evac should be in progress");
+
     ShenandoahEvacOOMScope oom_evac_scope;
 
     Thread* thread = Thread::current();
@@ -159,8 +169,14 @@
       }
     }
 
-    return res_oop;
+    fwd = res_oop;
   }
+
+  if (load_addr != NULL && fwd != obj) {
+    // Since we are here and we know the load address, update the reference.
+    ShenandoahHeap::cas_oop(fwd, load_addr, obj);
+  }
+
   return fwd;
 }
 
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Wed Sep 18 20:56:20 2019 +0200
@@ -97,9 +97,14 @@
   void keep_alive_barrier(oop obj);
 
   oop load_reference_barrier(oop obj);
-  oop load_reference_barrier_mutator(oop obj);
   oop load_reference_barrier_not_null(oop obj);
 
+  oop load_reference_barrier_mutator(oop obj, oop* load_addr);
+  oop load_reference_barrier_mutator(oop obj, narrowOop* load_addr);
+
+  template <class T>
+  oop load_reference_barrier_mutator_work(oop obj, T* load_addr);
+
   void enqueue(oop obj);
 
 private:
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Wed Sep 18 20:56:20 2019 +0200
@@ -60,9 +60,16 @@
   ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(orig);
 JRT_END
 
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc * src))
-  oop result = ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src);
-  return (oopDesc*) result;
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc* src))
+  return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, (oop*)NULL);
+JRT_END
+
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup(oopDesc* src, oop* load_addr))
+  return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
+JRT_END
+
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr))
+  return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
 JRT_END
 
 // Shenandoah clone barrier: makes sure that references point to to-space
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Wed Sep 18 20:56:20 2019 +0200
@@ -38,8 +38,11 @@
   static void write_ref_array_pre_duinit_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length);
   static void write_ref_field_pre_entry(oopDesc* orig, JavaThread* thread);
 
-  static oopDesc* load_reference_barrier(oopDesc *src);
-  static oopDesc* load_reference_barrier_native(oopDesc *src);
+  static oopDesc* load_reference_barrier(oopDesc* src);
+  static oopDesc* load_reference_barrier_fixup(oopDesc* src, oop* load_addr);
+  static oopDesc* load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr);
+
+  static oopDesc* load_reference_barrier_native(oopDesc* src);
 
   static void shenandoah_clone_barrier(oopDesc* s, oopDesc* d, size_t length);
 };
--- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp	Wed Sep 18 20:56:19 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp	Wed Sep 18 20:56:20 2019 +0200
@@ -391,5 +391,10 @@
                                                                             \
   experimental(bool, ShenandoahLoopOptsAfterExpansion, true,                \
           "Attempt more loop opts after barrier expansion")                 \
+                                                                            \
+  diagnostic(bool, ShenandoahSelfFixing, true,                              \
+          "Fix references with load reference barrier. Disabling this "     \
+          "might degrade performance.")                                     \
+
 
 #endif // SHARE_GC_SHENANDOAH_SHENANDOAH_GLOBALS_HPP