8231086: Shenandoah: Stronger invariant for object-arraycopy
authorrkennke
Wed, 18 Sep 2019 20:56:19 +0200
changeset 58218 0d7877278adf
parent 58217 b1a394e15ae9
child 58219 bc0648405d67
8231086: Shenandoah: Stronger invariant for object-arraycopy Reviewed-by: shade
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp
src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp
src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp	Wed Sep 18 20:56:19 2019 +0200
@@ -47,7 +47,7 @@
                                                        Register src, Register dst, Register count, RegSet saved_regs) {
   if (is_oop) {
     bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-    if (ShenandoahSATBBarrier && !dest_uninitialized) {
+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
 
       Label done;
 
@@ -57,27 +57,27 @@
       // Is marking active?
       Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
       __ ldrb(rscratch1, gc_state);
-      __ tbz(rscratch1, ShenandoahHeap::MARKING_BITPOS, done);
+      if (dest_uninitialized) {
+        __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
+      } else {
+        __ mov(rscratch2, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING);
+        __ tst(rscratch1, rscratch2);
+        __ br(Assembler::EQ, done);
+      }
 
       __ push(saved_regs, sp);
-      if (count == c_rarg0) {
-        if (dst == c_rarg1) {
-          // exactly backwards!!
-          __ mov(rscratch1, c_rarg0);
-          __ mov(c_rarg0, c_rarg1);
-          __ mov(c_rarg1, rscratch1);
+      if (UseCompressedOops) {
+        if (dest_uninitialized) {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count);
         } else {
-          __ mov(c_rarg1, count);
-          __ mov(c_rarg0, dst);
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count);
         }
       } else {
-        __ mov(c_rarg0, dst);
-        __ mov(c_rarg1, count);
-      }
-      if (UseCompressedOops) {
-        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), 2);
-      } else {
-        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2);
+        if (dest_uninitialized) {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count);
+        } else {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count);
+        }
       }
       __ pop(saved_regs, sp);
       __ bind(done);
@@ -85,31 +85,6 @@
   }
 }
 
-void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-                                                       Register start, Register count, Register scratch, RegSet saved_regs) {
-  if (is_oop) {
-      Label done;
-
-      // Avoid calling runtime if count == 0
-      __ cbz(count, done);
-
-      // Is updating references?
-      Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-      __ ldrb(rscratch1, gc_state);
-      __ tbz(rscratch1, ShenandoahHeap::UPDATEREFS_BITPOS, done);
-
-    __ push(saved_regs, sp);
-    assert_different_registers(start, count, scratch);
-    assert_different_registers(c_rarg0, count);
-    __ mov(c_rarg0, start);
-    __ mov(c_rarg1, count);
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 2);
-    __ pop(saved_regs, sp);
-
-    __ bind(done);
-  }
-}
-
 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
                                                                  Register obj,
                                                                  Register pre_val,
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -76,8 +76,6 @@
 
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register src, Register dst, Register count, RegSet saved_regs);
-  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-                                  Register start, Register count, Register tmp, RegSet saved_regs);
   virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                        Register dst, Address src, Register tmp1, Register tmp_thread);
   virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp	Wed Sep 18 20:56:19 2019 +0200
@@ -47,35 +47,28 @@
 void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                                        Register src, Register dst, Register count) {
 
-  bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
-  bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
-  bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops);
   bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 
   if (type == T_OBJECT || type == T_ARRAY) {
+
+    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) {
 #ifdef _LP64
-    if (!checkcast) {
-      if (!obj_int) {
-        // Save count for barrier
-        __ movptr(r11, count);
-      } else if (disjoint) {
-        // Save dst in r11 in the disjoint case
-        __ movq(r11, dst);
+      Register thread = r15_thread;
+#else
+      Register thread = rax;
+      if (thread == src || thread == dst || thread == count) {
+        thread = rbx;
       }
-    }
-#else
-    if (disjoint) {
-      __ mov(rdx, dst);          // save 'to'
-    }
-#endif
-
-    if (ShenandoahSATBBarrier && !dest_uninitialized) {
-      Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
-      assert_different_registers(dst, count, thread); // we don't care about src here?
-#ifndef _LP64
+      if (thread == src || thread == dst || thread == count) {
+        thread = rcx;
+      }
+      if (thread == src || thread == dst || thread == count) {
+        thread = rdx;
+      }
       __ push(thread);
       __ get_thread(thread);
 #endif
+      assert_different_registers(src, dst, count, thread);
 
       Label done;
       // Short-circuit if count == 0.
@@ -84,32 +77,33 @@
 
       // Avoid runtime call when not marking.
       Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-      __ testb(gc_state, ShenandoahHeap::MARKING);
+      int flags = ShenandoahHeap::HAS_FORWARDED;
+      if (!dest_uninitialized) {
+        flags |= ShenandoahHeap::MARKING;
+      }
+      __ testb(gc_state, flags);
       __ jcc(Assembler::zero, done);
 
       __ pusha();                      // push registers
 #ifdef _LP64
-      if (count == c_rarg0) {
-        if (dst == c_rarg1) {
-          // exactly backwards!!
-          __ xchgptr(c_rarg1, c_rarg0);
+      assert(src == rdi, "expected");
+      assert(dst == rsi, "expected");
+      assert(count == rdx, "expected");
+      if (UseCompressedOops) {
+        if (dest_uninitialized) {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry), src, dst, count);
         } else {
-          __ movptr(c_rarg1, count);
-          __ movptr(c_rarg0, dst);
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), src, dst, count);
         }
-      } else {
-        __ movptr(c_rarg0, dst);
-        __ movptr(c_rarg1, count);
+      } else
+#endif
+      {
+        if (dest_uninitialized) {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry), src, dst, count);
+        } else {
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), src, dst, count);
+        }
       }
-      if (UseCompressedOops) {
-        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry), 2);
-      } else {
-        __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry), 2);
-      }
-#else
-      __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_pre_oop_entry),
-                      dst, count);
-#endif
       __ popa();
       __ bind(done);
       NOT_LP64(__ pop(thread);)
@@ -118,73 +112,6 @@
 
 }
 
-void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                                       Register src, Register dst, Register count) {
-  bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
-  bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
-  bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops);
-  Register tmp = rax;
-
-  if (type == T_OBJECT || type == T_ARRAY) {
-#ifdef _LP64
-    if (!checkcast) {
-      if (!obj_int) {
-        // Save count for barrier
-        count = r11;
-      } else if (disjoint && obj_int) {
-        // Use the saved dst in the disjoint case
-        dst = r11;
-      }
-    } else {
-      tmp = rscratch1;
-    }
-#else
-    if (disjoint) {
-      __ mov(dst, rdx); // restore 'to'
-    }
-#endif
-
-    Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
-    assert_different_registers(dst, thread); // do we care about src at all here?
-
-#ifndef _LP64
-    __ push(thread);
-    __ get_thread(thread);
-#endif
-
-    // Short-circuit if count == 0.
-    Label done;
-    __ testptr(count, count);
-    __ jcc(Assembler::zero, done);
-
-    // Skip runtime call if no forwarded objects.
-    Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
-    __ testb(gc_state, ShenandoahHeap::UPDATEREFS);
-    __ jcc(Assembler::zero, done);
-
-    __ pusha();             // push registers (overkill)
-#ifdef _LP64
-    if (c_rarg0 == count) { // On win64 c_rarg0 == rcx
-      assert_different_registers(c_rarg1, dst);
-      __ mov(c_rarg1, count);
-      __ mov(c_rarg0, dst);
-    } else {
-      assert_different_registers(c_rarg0, count);
-      __ mov(c_rarg0, dst);
-      __ mov(c_rarg1, count);
-    }
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry), 2);
-#else
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_array_post_entry),
-                    dst, count);
-#endif
-    __ popa();
-
-    __ bind(done);
-    NOT_LP64(__ pop(thread);)
-  }
-}
-
 void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,
                                                                  Register obj,
                                                                  Register pre_val,
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -83,8 +83,6 @@
                    bool exchange, Register tmp1, Register tmp2);
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                                   Register src, Register dst, Register count);
-  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-                                  Register src, Register dst, Register count);
   virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                        Register dst, Address src, Register tmp1, Register tmp_thread);
   virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp	Wed Sep 18 20:56:19 2019 +0200
@@ -461,9 +461,11 @@
 }
 
 const TypeFunc* ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type() {
-  const Type **fields = TypeTuple::fields(1);
-  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+  const Type **fields = TypeTuple::fields(3);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // src
+  fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL; // dst
+  fields[TypeFunc::Parms+2] = TypeInt::INT; // length
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3, fields);
 
   // create result type (range)
   fields = TypeTuple::fields(0);
@@ -705,11 +707,6 @@
   return result;
 }
 
-void ShenandoahBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
-  assert(!src->is_AddP(), "unexpected input");
-  BarrierSetC2::clone(kit, src, dst, size, is_array);
-}
-
 // Support for GC barriers emitted during parsing
 bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const {
   if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) return true;
@@ -771,9 +768,8 @@
   return true;
 }
 
-bool ShenandoahBarrierSetC2::clone_needs_postbarrier(ArrayCopyNode *ac, PhaseIterGVN& igvn) {
-  Node* src = ac->in(ArrayCopyNode::Src);
-  const TypeOopPtr* src_type = igvn.type(src)->is_oopptr();
+bool ShenandoahBarrierSetC2::clone_needs_barrier(Node* src, PhaseGVN& gvn) {
+  const TypeOopPtr* src_type = gvn.type(src)->is_oopptr();
   if (src_type->isa_instptr() != NULL) {
     ciInstanceKlass* ik = src_type->klass()->as_instance_klass();
     if ((src_type->klass_is_exact() || (!ik->is_interface() && !ik->has_subklass())) && !ik->has_injected_fields()) {
@@ -781,7 +777,7 @@
         return true;
       } else {
         if (!src_type->klass_is_exact()) {
-          igvn.C->dependencies()->assert_leaf_type(ik);
+          Compile::current()->dependencies()->assert_leaf_type(ik);
         }
       }
     } else {
@@ -798,42 +794,29 @@
   return false;
 }
 
-void ShenandoahBarrierSetC2::clone_barrier_at_expansion(ArrayCopyNode* ac, Node* call, PhaseIterGVN& igvn) const {
-  assert(ac->is_clonebasic(), "no other kind of arraycopy here");
+#define XTOP LP64_ONLY(COMMA phase->top())
 
-  if (!clone_needs_postbarrier(ac, igvn)) {
-    BarrierSetC2::clone_barrier_at_expansion(ac, call, igvn);
-    return;
-  }
-
-  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-  Node* c = new ProjNode(call,TypeFunc::Control);
-  c = igvn.transform(c);
-  Node* m = new ProjNode(call, TypeFunc::Memory);
-  m = igvn.transform(m);
-
+void ShenandoahBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const {
+  Node* ctrl = ac->in(TypeFunc::Control);
+  Node* mem = ac->in(TypeFunc::Memory);
+  Node* src = ac->in(ArrayCopyNode::Src);
+  Node* src_offset = ac->in(ArrayCopyNode::SrcPos);
   Node* dest = ac->in(ArrayCopyNode::Dest);
-  assert(dest->is_AddP(), "bad input");
-  Node* barrier_call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type(),
-                                        CAST_FROM_FN_PTR(address, ShenandoahRuntime::shenandoah_clone_barrier),
-                                        "shenandoah_clone_barrier", raw_adr_type);
-  barrier_call->init_req(TypeFunc::Control, c);
-  barrier_call->init_req(TypeFunc::I_O    , igvn.C->top());
-  barrier_call->init_req(TypeFunc::Memory , m);
-  barrier_call->init_req(TypeFunc::ReturnAdr, igvn.C->top());
-  barrier_call->init_req(TypeFunc::FramePtr, igvn.C->top());
-  barrier_call->init_req(TypeFunc::Parms+0, dest->in(AddPNode::Base));
-
-  barrier_call = igvn.transform(barrier_call);
-  c = new ProjNode(barrier_call,TypeFunc::Control);
-  c = igvn.transform(c);
-  m = new ProjNode(barrier_call, TypeFunc::Memory);
-  m = igvn.transform(m);
-
-  Node* out_c = ac->proj_out(TypeFunc::Control);
-  Node* out_m = ac->proj_out(TypeFunc::Memory);
-  igvn.replace_node(out_c, c);
-  igvn.replace_node(out_m, m);
+  Node* dest_offset = ac->in(ArrayCopyNode::DestPos);
+  Node* length = ac->in(ArrayCopyNode::Length);
+  assert (src_offset == NULL && dest_offset == NULL, "for clone offsets should be null");
+  if (ShenandoahCloneBarrier && clone_needs_barrier(src, phase->igvn())) {
+    Node* call = phase->make_leaf_call(ctrl, mem,
+                    ShenandoahBarrierSetC2::shenandoah_clone_barrier_Type(),
+                    CAST_FROM_FN_PTR(address, ShenandoahRuntime::shenandoah_clone_barrier),
+                    "shenandoah_clone",
+                    TypeRawPtr::BOTTOM,
+                    src, dest, length);
+    call = phase->transform_later(call);
+    phase->igvn().replace_node(ac, call);
+  } else {
+    BarrierSetC2::clone_at_expansion(phase, ac);
+  }
 }
 
 
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -78,7 +78,7 @@
   void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
                           Node* pre_val, bool need_mem_bar) const;
 
-  static bool clone_needs_postbarrier(ArrayCopyNode *ac, PhaseIterGVN& igvn);
+  static bool clone_needs_barrier(Node* src, PhaseGVN& gvn);
 
 protected:
   virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
@@ -106,11 +106,10 @@
   virtual bool has_load_barriers() const { return true; }
 
   // This is the entry-point for the backend to perform accesses through the Access API.
-  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
+  virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
 
   // These are general helper methods used by C2
   virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const;
-  virtual void clone_barrier_at_expansion(ArrayCopyNode* ac, Node* call, PhaseIterGVN& igvn) const;
 
   // Support for GC barriers emitted during parsing
   virtual bool is_gc_barrier_node(Node* node) const;
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp	Wed Sep 18 20:56:19 2019 +0200
@@ -41,33 +41,6 @@
 class ShenandoahBarrierSetC1;
 class ShenandoahBarrierSetC2;
 
-template <bool STOREVAL_EVAC_BARRIER>
-class ShenandoahUpdateRefsForOopClosure: public BasicOopIterateClosure {
-private:
-  ShenandoahHeap* _heap;
-  ShenandoahBarrierSet* _bs;
-
-  template <class T>
-  inline void do_oop_work(T* p) {
-    oop o;
-    if (STOREVAL_EVAC_BARRIER) {
-      o = _heap->evac_update_with_forwarded(p);
-      if (!CompressedOops::is_null(o)) {
-        _bs->enqueue(o);
-      }
-    } else {
-      _heap->maybe_update_with_forwarded(p);
-    }
-  }
-public:
-  ShenandoahUpdateRefsForOopClosure() : _heap(ShenandoahHeap::heap()), _bs(ShenandoahBarrierSet::barrier_set()) {
-    assert(UseShenandoahGC && ShenandoahCloneBarrier, "should be enabled");
-  }
-
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-};
-
 ShenandoahBarrierSet::ShenandoahBarrierSet(ShenandoahHeap* heap) :
   BarrierSet(make_barrier_set_assembler<ShenandoahBarrierSetAssembler>(),
              make_barrier_set_c1<ShenandoahBarrierSetC1>(),
@@ -97,73 +70,6 @@
   return true;
 }
 
-template <class T, bool STOREVAL_EVAC_BARRIER>
-void ShenandoahBarrierSet::write_ref_array_loop(HeapWord* start, size_t count) {
-  assert(UseShenandoahGC && ShenandoahCloneBarrier, "should be enabled");
-  ShenandoahUpdateRefsForOopClosure<STOREVAL_EVAC_BARRIER> cl;
-  T* dst = (T*) start;
-  for (size_t i = 0; i < count; i++) {
-    cl.do_oop(dst++);
-  }
-}
-
-void ShenandoahBarrierSet::write_ref_array(HeapWord* start, size_t count) {
-  assert(_heap->is_update_refs_in_progress(), "should not be here otherwise");
-  assert(count > 0, "Should have been filtered before");
-
-  if (_heap->is_concurrent_traversal_in_progress()) {
-    ShenandoahEvacOOMScope oom_evac_scope;
-    if (UseCompressedOops) {
-      write_ref_array_loop<narrowOop, /* evac = */ true>(start, count);
-    } else {
-      write_ref_array_loop<oop,       /* evac = */ true>(start, count);
-    }
-  } else {
-    if (UseCompressedOops) {
-      write_ref_array_loop<narrowOop, /* evac = */ false>(start, count);
-    } else {
-      write_ref_array_loop<oop,       /* evac = */ false>(start, count);
-    }
-  }
-}
-
-template <class T>
-void ShenandoahBarrierSet::write_ref_array_pre_work(T* dst, size_t count) {
-  shenandoah_assert_not_in_cset_loc_except(dst, _heap->cancelled_gc());
-  assert(ShenandoahThreadLocalData::satb_mark_queue(Thread::current()).is_active(), "Shouldn't be here otherwise");
-  assert(ShenandoahSATBBarrier, "Shouldn't be here otherwise");
-  assert(count > 0, "Should have been filtered before");
-
-  Thread* thread = Thread::current();
-  ShenandoahMarkingContext* ctx = _heap->marking_context();
-  bool has_forwarded = _heap->has_forwarded_objects();
-  T* elem_ptr = dst;
-  for (size_t i = 0; i < count; i++, elem_ptr++) {
-    T heap_oop = RawAccess<>::oop_load(elem_ptr);
-    if (!CompressedOops::is_null(heap_oop)) {
-      oop obj = CompressedOops::decode_not_null(heap_oop);
-      if (has_forwarded) {
-        obj = resolve_forwarded_not_null(obj);
-      }
-      if (!ctx->is_marked(obj)) {
-        ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(obj);
-      }
-    }
-  }
-}
-
-void ShenandoahBarrierSet::write_ref_array_pre(oop* dst, size_t count, bool dest_uninitialized) {
-  if (! dest_uninitialized) {
-    write_ref_array_pre_work(dst, count);
-  }
-}
-
-void ShenandoahBarrierSet::write_ref_array_pre(narrowOop* dst, size_t count, bool dest_uninitialized) {
-  if (! dest_uninitialized) {
-    write_ref_array_pre_work(dst, count);
-  }
-}
-
 template <class T>
 inline void ShenandoahBarrierSet::inline_write_ref_field_pre(T* field, oop new_val) {
   shenandoah_assert_not_in_cset_loc_except(field, _heap->cancelled_gc());
@@ -194,27 +100,6 @@
   shenandoah_assert_not_in_cset_except    (v, o, o == NULL || _heap->cancelled_gc() || !_heap->is_concurrent_mark_in_progress());
 }
 
-void ShenandoahBarrierSet::write_region(MemRegion mr) {
-  if (!ShenandoahCloneBarrier) return;
-  if (!_heap->is_update_refs_in_progress()) return;
-
-  // This is called for cloning an object (see jvm.cpp) after the clone
-  // has been made. We are not interested in any 'previous value' because
-  // it would be NULL in any case. But we *are* interested in any oop*
-  // that potentially need to be updated.
-
-  oop obj = oop(mr.start());
-  shenandoah_assert_correct(NULL, obj);
-  if (_heap->is_concurrent_traversal_in_progress()) {
-    ShenandoahEvacOOMScope oom_evac_scope;
-    ShenandoahUpdateRefsForOopClosure</* evac = */ true> cl;
-    obj->oop_iterate(&cl);
-  } else {
-    ShenandoahUpdateRefsForOopClosure</* evac = */ false> cl;
-    obj->oop_iterate(&cl);
-  }
-}
-
 oop ShenandoahBarrierSet::load_reference_barrier_not_null(oop obj) {
   if (ShenandoahLoadRefBarrier && _heap->has_forwarded_objects()) {
     return load_reference_barrier_impl(obj);
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -63,14 +63,14 @@
 
   bool is_aligned(HeapWord* hw);
 
-  void write_ref_array(HeapWord* start, size_t count);
+  template <class T> void
+  write_ref_array_pre_work(T* src, T* dst, size_t count, bool dest_uninitialized);
 
-  template <class T> void
-  write_ref_array_pre_work(T* dst, size_t count);
-
-  void write_ref_array_pre(oop* dst, size_t count, bool dest_uninitialized);
-
-  void write_ref_array_pre(narrowOop* dst, size_t count, bool dest_uninitialized);
+  inline void arraycopy_pre(oop* src, oop* dst, size_t count);
+  inline void arraycopy_pre(narrowOop* src, narrowOop* dst, size_t count);
+  inline void arraycopy_update(oop* src, size_t count);
+  inline void arraycopy_update(narrowOop* src, size_t count);
+  inline void clone_barrier(oop src);
 
   // We export this to make it available in cases where the static
   // type of the barrier set is known.  Note that it is non-virtual.
@@ -82,7 +82,6 @@
   void write_ref_field_pre_work(void* field, oop new_val);
 
   void write_ref_field_work(void* v, oop o, bool release = false);
-  void write_region(MemRegion mr);
 
   oop oop_load_from_native_barrier(oop obj);
 
@@ -104,8 +103,12 @@
   void enqueue(oop obj);
 
 private:
-  template <class T, bool STOREVAL_WRITE_BARRIER>
-  void write_ref_array_loop(HeapWord* start, size_t count);
+  template <class T>
+  inline void arraycopy_pre_work(T* src, T* dst, size_t count);
+  template <class T, bool HAS_FWD, bool EVAC, bool ENQUEUE>
+  inline void arraycopy_work(T* src, size_t count);
+  template <class T>
+  inline void arraycopy_update_impl(T* src, size_t count);
 
   oop load_reference_barrier_impl(oop obj);
 
@@ -118,24 +121,6 @@
     }
   }
 
-  template <typename T>
-  bool arraycopy_loop_1(T* src, T* dst, size_t length, Klass* bound,
-                        bool checkcast, bool satb, bool disjoint, ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode);
-
-  template <typename T, bool CHECKCAST>
-  bool arraycopy_loop_2(T* src, T* dst, size_t length, Klass* bound,
-                        bool satb, bool disjoint, ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode);
-
-  template <typename T, bool CHECKCAST, bool SATB>
-  bool arraycopy_loop_3(T* src, T* dst, size_t length, Klass* bound,
-                        bool disjoint, ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode);
-
-  template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-  bool arraycopy_loop(T* src, T* dst, size_t length, Klass* bound, bool disjoint);
-
-  template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-  bool arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* const thread, ShenandoahMarkingContext* const ctx);
-
 public:
   // Callbacks for runtime accesses.
   template <DecoratorSet decorators, typename BarrierSetT = ShenandoahBarrierSet>
--- a/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.inline.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -27,11 +27,14 @@
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shenandoah/shenandoahAsserts.hpp"
 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahCollectionSet.inline.hpp"
 #include "gc/shenandoah/shenandoahForwarding.inline.hpp"
 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
 #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
+#include "memory/iterator.inline.hpp"
+#include "oops/oop.inline.hpp"
 
 inline oop ShenandoahBarrierSet::resolve_forwarded_not_null(oop p) {
   return ShenandoahForwarding::get_forwardee(p);
@@ -179,158 +182,13 @@
   return result;
 }
 
-template <typename T>
-bool ShenandoahBarrierSet::arraycopy_loop_1(T* src, T* dst, size_t length, Klass* bound,
-                                            bool checkcast, bool satb, bool disjoint,
-                                            ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode) {
-  if (checkcast) {
-    return arraycopy_loop_2<T, true>(src, dst, length, bound, satb, disjoint, storeval_mode);
-  } else {
-    return arraycopy_loop_2<T, false>(src, dst, length, bound, satb, disjoint, storeval_mode);
-  }
-}
-
-template <typename T, bool CHECKCAST>
-bool ShenandoahBarrierSet::arraycopy_loop_2(T* src, T* dst, size_t length, Klass* bound,
-                                            bool satb, bool disjoint,
-                                            ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode) {
-  if (satb) {
-    return arraycopy_loop_3<T, CHECKCAST, true>(src, dst, length, bound, disjoint, storeval_mode);
-  } else {
-    return arraycopy_loop_3<T, CHECKCAST, false>(src, dst, length, bound, disjoint, storeval_mode);
-  }
-}
-
-template <typename T, bool CHECKCAST, bool SATB>
-bool ShenandoahBarrierSet::arraycopy_loop_3(T* src, T* dst, size_t length, Klass* bound, bool disjoint,
-                                            ShenandoahBarrierSet::ArrayCopyStoreValMode storeval_mode) {
-  switch (storeval_mode) {
-    case NONE:
-      return arraycopy_loop<T, CHECKCAST, SATB, NONE>(src, dst, length, bound, disjoint);
-    case RESOLVE_BARRIER:
-      return arraycopy_loop<T, CHECKCAST, SATB, RESOLVE_BARRIER>(src, dst, length, bound, disjoint);
-    case EVAC_BARRIER:
-      return arraycopy_loop<T, CHECKCAST, SATB, EVAC_BARRIER>(src, dst, length, bound, disjoint);
-    default:
-      ShouldNotReachHere();
-      return true; // happy compiler
-  }
-}
-
-template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-bool ShenandoahBarrierSet::arraycopy_loop(T* src, T* dst, size_t length, Klass* bound, bool disjoint) {
-  Thread* thread = Thread::current();
-  ShenandoahMarkingContext* ctx = _heap->marking_context();
-  ShenandoahEvacOOMScope oom_evac_scope;
-
-  // We need to handle four cases:
-  //
-  // a) src < dst, conjoint, can only copy backward only
-  //   [...src...]
-  //         [...dst...]
-  //
-  // b) src < dst, disjoint, can only copy forward, because types may mismatch
-  //   [...src...]
-  //              [...dst...]
-  //
-  // c) src > dst, conjoint, can copy forward only
-  //         [...src...]
-  //   [...dst...]
-  //
-  // d) src > dst, disjoint, can only copy forward, because types may mismatch
-  //              [...src...]
-  //   [...dst...]
-  //
-  if (src > dst || disjoint) {
-    // copy forward:
-    T* cur_src = src;
-    T* cur_dst = dst;
-    T* src_end = src + length;
-    for (; cur_src < src_end; cur_src++, cur_dst++) {
-      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread, ctx)) {
-        return false;
-      }
-    }
-  } else {
-    // copy backward:
-    T* cur_src = src + length - 1;
-    T* cur_dst = dst + length - 1;
-    for (; cur_src >= src; cur_src--, cur_dst--) {
-      if (!arraycopy_element<T, CHECKCAST, SATB, STOREVAL_MODE>(cur_src, cur_dst, bound, thread, ctx)) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-template <typename T, bool CHECKCAST, bool SATB, ShenandoahBarrierSet::ArrayCopyStoreValMode STOREVAL_MODE>
-bool ShenandoahBarrierSet::arraycopy_element(T* cur_src, T* cur_dst, Klass* bound, Thread* const thread, ShenandoahMarkingContext* const ctx) {
-  T o = RawAccess<>::oop_load(cur_src);
-
-  if (SATB) {
-    assert(ShenandoahThreadLocalData::satb_mark_queue(thread).is_active(), "Shouldn't be here otherwise");
-    T prev = RawAccess<>::oop_load(cur_dst);
-    if (!CompressedOops::is_null(prev)) {
-      oop prev_obj = CompressedOops::decode_not_null(prev);
-      switch (STOREVAL_MODE) {
-      case NONE:
-        break;
-      case RESOLVE_BARRIER:
-      case EVAC_BARRIER:
-        // The evac-barrier case cannot really happen. It's traversal-only and traversal
-        // doesn't currently use SATB. And even if it did, it would not be fatal to just do the normal resolve here.
-        prev_obj = ShenandoahBarrierSet::resolve_forwarded_not_null(prev_obj);
-      }
-      if (!ctx->is_marked(prev_obj)) {
-        ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(prev_obj);
-      }
-    }
-  }
-
-  if (!CompressedOops::is_null(o)) {
-    oop obj = CompressedOops::decode_not_null(o);
-
-    if (CHECKCAST) {
-      assert(bound != NULL, "need element klass for checkcast");
-      if (!oopDesc::is_instanceof_or_null(obj, bound)) {
-        return false;
-      }
-    }
-
-    switch (STOREVAL_MODE) {
-    case NONE:
-      break;
-    case RESOLVE_BARRIER:
-      obj = ShenandoahBarrierSet::resolve_forwarded_not_null(obj);
-      break;
-    case EVAC_BARRIER:
-      if (_heap->in_collection_set(obj)) {
-        oop forw = ShenandoahBarrierSet::resolve_forwarded_not_null(obj);
-        if (forw == obj) {
-          forw = _heap->evacuate_object(forw, thread);
-        }
-        obj = forw;
-      }
-      enqueue(obj);
-      break;
-    default:
-      ShouldNotReachHere();
-    }
-
-    RawAccess<IS_NOT_NULL>::oop_store(cur_dst, obj);
-  } else {
-    // Store null.
-    RawAccess<>::oop_store(cur_dst, o);
-  }
-  return true;
-}
-
 // Clone barrier support
 template <DecoratorSet decorators, typename BarrierSetT>
 void ShenandoahBarrierSet::AccessBarrier<decorators, BarrierSetT>::clone_in_heap(oop src, oop dst, size_t size) {
+  if (ShenandoahCloneBarrier) {
+    ShenandoahBarrierSet::barrier_set()->clone_barrier(src);
+  }
   Raw::clone(src, dst, size);
-  ShenandoahBarrierSet::barrier_set()->write_region(MemRegion((HeapWord*) dst, size));
 }
 
 template <DecoratorSet decorators, typename BarrierSetT>
@@ -338,36 +196,144 @@
 bool ShenandoahBarrierSet::AccessBarrier<decorators, BarrierSetT>::oop_arraycopy_in_heap(arrayOop src_obj, size_t src_offset_in_bytes, T* src_raw,
                                                                                          arrayOop dst_obj, size_t dst_offset_in_bytes, T* dst_raw,
                                                                                          size_t length) {
-  ShenandoahHeap* heap = ShenandoahHeap::heap();
-  bool satb = ShenandoahSATBBarrier && heap->is_concurrent_mark_in_progress();
-  bool checkcast = HasDecorator<decorators, ARRAYCOPY_CHECKCAST>::value;
-  bool disjoint = HasDecorator<decorators, ARRAYCOPY_DISJOINT>::value;
-  ArrayCopyStoreValMode storeval_mode;
-  if (heap->has_forwarded_objects()) {
-    if (heap->is_concurrent_traversal_in_progress()) {
-      storeval_mode = EVAC_BARRIER;
-    } else if (heap->is_update_refs_in_progress()) {
-      storeval_mode = RESOLVE_BARRIER;
+  ShenandoahBarrierSet* bs = ShenandoahBarrierSet::barrier_set();
+  bs->arraycopy_pre(arrayOopDesc::obj_offset_to_raw(src_obj, src_offset_in_bytes, src_raw),
+                    arrayOopDesc::obj_offset_to_raw(dst_obj, dst_offset_in_bytes, dst_raw),
+                    length);
+  return Raw::oop_arraycopy_in_heap(src_obj, src_offset_in_bytes, src_raw, dst_obj, dst_offset_in_bytes, dst_raw, length);
+}
+
+template <class T, bool HAS_FWD, bool EVAC, bool ENQUEUE>
+void ShenandoahBarrierSet::arraycopy_work(T* src, size_t count) {
+  Thread* thread = Thread::current();
+  SATBMarkQueue& queue = ShenandoahThreadLocalData::satb_mark_queue(thread);
+  ShenandoahMarkingContext* ctx = _heap->marking_context();
+  const ShenandoahCollectionSet* const cset = _heap->collection_set();
+  T* end = src + count;
+  for (T* elem_ptr = src; elem_ptr < end; elem_ptr++) {
+    T o = RawAccess<>::oop_load(elem_ptr);
+    if (!CompressedOops::is_null(o)) {
+      oop obj = CompressedOops::decode_not_null(o);
+      if (HAS_FWD && cset->is_in((HeapWord *) obj)) {
+        assert(_heap->has_forwarded_objects(), "only get here with forwarded objects");
+        oop fwd = resolve_forwarded_not_null(obj);
+        if (EVAC && obj == fwd) {
+          fwd = _heap->evacuate_object(obj, thread);
+        }
+        assert(obj != fwd || _heap->cancelled_gc(), "must be forwarded");
+        oop witness = ShenandoahHeap::cas_oop(fwd, elem_ptr, o);
+        obj = fwd;
+      }
+      if (ENQUEUE && !ctx->is_marked(obj)) {
+        queue.enqueue_known_active(obj);
+      }
+    }
+  }
+}
+
+template <class T>
+void ShenandoahBarrierSet::arraycopy_pre_work(T* src, T* dst, size_t count) {
+  if (_heap->is_concurrent_mark_in_progress()) {
+    if (_heap->has_forwarded_objects()) {
+      arraycopy_work<T, true, false, true>(dst, count);
     } else {
-      assert(heap->is_idle() || heap->is_evacuation_in_progress(), "must not have anything in progress");
-      storeval_mode = NONE; // E.g. during evac or outside cycle
+      arraycopy_work<T, false, false, true>(dst, count);
     }
-  } else {
-    assert(heap->is_stable() || heap->is_concurrent_mark_in_progress(), "must not have anything in progress");
-    storeval_mode = NONE;
   }
 
-  if (!satb && !checkcast && storeval_mode == NONE) {
-    // Short-circuit to bulk copy.
-    return Raw::oop_arraycopy(src_obj, src_offset_in_bytes, src_raw, dst_obj, dst_offset_in_bytes, dst_raw, length);
+  arraycopy_update_impl(src, count);
+}
+
+void ShenandoahBarrierSet::arraycopy_pre(oop* src, oop* dst, size_t count) {
+  arraycopy_pre_work(src, dst, count);
+}
+
+void ShenandoahBarrierSet::arraycopy_pre(narrowOop* src, narrowOop* dst, size_t count) {
+  arraycopy_pre_work(src, dst, count);
+}
+
+template <class T>
+void ShenandoahBarrierSet::arraycopy_update_impl(T* src, size_t count) {
+  if (_heap->is_evacuation_in_progress()) {
+    ShenandoahEvacOOMScope oom_evac;
+    arraycopy_work<T, true, true, false>(src, count);
+  } else if (_heap->is_concurrent_traversal_in_progress()){
+    ShenandoahEvacOOMScope oom_evac;
+    arraycopy_work<T, true, true, true>(src, count);
+  } else if (_heap->has_forwarded_objects()) {
+    arraycopy_work<T, true, false, false>(src, count);
+  }
+}
+
+void ShenandoahBarrierSet::arraycopy_update(oop* src, size_t count) {
+  arraycopy_update_impl(src, count);
+}
+
+void ShenandoahBarrierSet::arraycopy_update(narrowOop* src, size_t count) {
+  arraycopy_update_impl(src, count);
+}
+
+template <bool EVAC, bool ENQUEUE>
+class ShenandoahUpdateRefsForOopClosure: public BasicOopIterateClosure {
+private:
+  ShenandoahHeap* const _heap;
+  ShenandoahBarrierSet* const _bs;
+  const ShenandoahCollectionSet* const _cset;
+  Thread* const _thread;
+
+  template <class T>
+  inline void do_oop_work(T* p) {
+    T o = RawAccess<>::oop_load(p);
+    if (!CompressedOops::is_null(o)) {
+      oop obj = CompressedOops::decode_not_null(o);
+      if (_cset->is_in((HeapWord *)obj)) {
+        oop fwd = _bs->resolve_forwarded_not_null(obj);
+        if (EVAC && obj == fwd) {
+          fwd = _heap->evacuate_object(obj, _thread);
+        }
+        if (ENQUEUE) {
+          _bs->enqueue(fwd);
+        }
+        assert(obj != fwd || _heap->cancelled_gc(), "must be forwarded");
+        ShenandoahHeap::cas_oop(fwd, p, o);
+      }
+
+    }
+  }
+public:
+  ShenandoahUpdateRefsForOopClosure() :
+          _heap(ShenandoahHeap::heap()),
+          _bs(ShenandoahBarrierSet::barrier_set()),
+          _cset(_heap->collection_set()),
+          _thread(Thread::current()) {
   }
 
-  src_raw = arrayOopDesc::obj_offset_to_raw(src_obj, src_offset_in_bytes, src_raw);
-  dst_raw = arrayOopDesc::obj_offset_to_raw(dst_obj, dst_offset_in_bytes, dst_raw);
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+void ShenandoahBarrierSet::clone_barrier(oop obj) {
+  assert(ShenandoahCloneBarrier, "only get here with clone barriers enabled");
+  if (!_heap->has_forwarded_objects()) return;
+
+  // This is called for cloning an object (see jvm.cpp) after the clone
+  // has been made. We are not interested in any 'previous value' because
+  // it would be NULL in any case. But we *are* interested in any oop*
+  // that potentially need to be updated.
 
-  Klass* bound = objArrayOop(dst_obj)->element_klass();
-  ShenandoahBarrierSet* bs = ShenandoahBarrierSet::barrier_set();
-  return bs->arraycopy_loop_1(src_raw, dst_raw, length, bound, checkcast, satb, disjoint, storeval_mode);
+  shenandoah_assert_correct(NULL, obj);
+  if (_heap->is_evacuation_in_progress()) {
+    ShenandoahEvacOOMScope evac_scope;
+    ShenandoahUpdateRefsForOopClosure</* evac = */ true, /* enqueue */ false> cl;
+    obj->oop_iterate(&cl);
+  } else if (_heap->is_concurrent_traversal_in_progress()) {
+    ShenandoahEvacOOMScope evac_scope;
+    ShenandoahUpdateRefsForOopClosure</* evac = */ true, /* enqueue */ true> cl;
+    obj->oop_iterate(&cl);
+  } else {
+    ShenandoahUpdateRefsForOopClosure</* evac = */ false, /* enqueue */ false> cl;
+    obj->oop_iterate(&cl);
+  }
 }
 
 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHBARRIERSET_INLINE_HPP
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -707,6 +707,7 @@
 
   static inline oop cas_oop(oop n, narrowOop* addr, oop c);
   static inline oop cas_oop(oop n, oop* addr, oop c);
+  static inline oop cas_oop(oop n, narrowOop* addr, narrowOop c);
 
   void trash_humongous_region_at(ShenandoahHeapRegion *r);
 
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -133,6 +133,11 @@
   return (oop) Atomic::cmpxchg(n, addr, c);
 }
 
+inline oop ShenandoahHeap::cas_oop(oop n, narrowOop* addr, narrowOop c) {
+  narrowOop val = CompressedOops::encode(n);
+  return CompressedOops::decode((narrowOop) Atomic::cmpxchg(val, addr, c));
+}
+
 inline oop ShenandoahHeap::cas_oop(oop n, narrowOop* addr, oop c) {
   narrowOop cmp = CompressedOops::encode(c);
   narrowOop val = CompressedOops::encode(n);
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp	Wed Sep 18 20:56:19 2019 +0200
@@ -22,25 +22,30 @@
  */
 
 #include "precompiled.hpp"
-#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.inline.hpp"
 #include "gc/shenandoah/shenandoahRuntime.hpp"
 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "oops/oop.inline.hpp"
 
-void ShenandoahRuntime::write_ref_array_pre_oop_entry(oop* dst, size_t length) {
+void ShenandoahRuntime::write_ref_array_pre_oop_entry(oop* src, oop* dst, size_t length) {
   ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set();
-  bs->write_ref_array_pre(dst, length, false);
+  bs->arraycopy_pre(src, dst, length);
 }
 
-void ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* dst, size_t length) {
+void ShenandoahRuntime::write_ref_array_pre_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length) {
   ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set();
-  bs->write_ref_array_pre(dst, length, false);
+  bs->arraycopy_pre(src, dst, length);
 }
 
-void ShenandoahRuntime::write_ref_array_post_entry(HeapWord* dst, size_t length) {
+void ShenandoahRuntime::write_ref_array_pre_duinit_oop_entry(oop* src, oop* dst, size_t length) {
   ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set();
-  bs->ShenandoahBarrierSet::write_ref_array(dst, length);
+  bs->arraycopy_update(src, length);
+}
+
+void ShenandoahRuntime::write_ref_array_pre_duinit_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length) {
+  ShenandoahBarrierSet *bs = ShenandoahBarrierSet::barrier_set();
+  bs->arraycopy_update(src, length);
 }
 
 // Shenandoah pre write barrier slowpath
@@ -62,8 +67,13 @@
 
 // Shenandoah clone barrier: makes sure that references point to to-space
 // in cloned objects.
-JRT_LEAF(void, ShenandoahRuntime::shenandoah_clone_barrier(oopDesc* obj))
-  ShenandoahBarrierSet::barrier_set()->write_region(MemRegion((HeapWord*) obj, obj->size()));
+JRT_LEAF(void, ShenandoahRuntime::shenandoah_clone_barrier(oopDesc* s, oopDesc* d, size_t length))
+  oop src = oop(s);
+  oop dst = oop(d);
+  shenandoah_assert_correct(NULL, src);
+  shenandoah_assert_correct(NULL, dst);
+  ShenandoahBarrierSet::barrier_set()->clone_barrier(src);
+  RawAccessBarrier<IS_NOT_NULL>::clone(src, dst, length);
 JRT_END
 
 JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_native(oopDesc * src))
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Wed Sep 18 20:56:18 2019 +0200
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp	Wed Sep 18 20:56:19 2019 +0200
@@ -32,15 +32,16 @@
 
 class ShenandoahRuntime : public AllStatic {
 public:
-  static void write_ref_array_pre_oop_entry(oop* dst, size_t length);
-  static void write_ref_array_pre_narrow_oop_entry(narrowOop* dst, size_t length);
-  static void write_ref_array_post_entry(HeapWord* dst, size_t length);
+  static void write_ref_array_pre_oop_entry(oop* src, oop* dst, size_t length);
+  static void write_ref_array_pre_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length);
+  static void write_ref_array_pre_duinit_oop_entry(oop* src, oop* dst, size_t length);
+  static void write_ref_array_pre_duinit_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length);
   static void write_ref_field_pre_entry(oopDesc* orig, JavaThread* thread);
 
   static oopDesc* load_reference_barrier(oopDesc *src);
   static oopDesc* load_reference_barrier_native(oopDesc *src);
 
-  static void shenandoah_clone_barrier(oopDesc* obj);
+  static void shenandoah_clone_barrier(oopDesc* s, oopDesc* d, size_t length);
 };
 
 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHRUNTIME_HPP